diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
237 files changed, 46913 insertions, 6629 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 9221e5489069..2e98c016cb47 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config DRM_AMDGPU_SI bool "Enable amdgpu support for SI parts" depends on DRM_AMDGPU @@ -26,10 +27,12 @@ config DRM_AMDGPU_CIK config DRM_AMDGPU_USERPTR bool "Always enable userptr write support" depends on DRM_AMDGPU + depends on MMU + select HMM_MIRROR select MMU_NOTIFIER help - This option selects CONFIG_MMU_NOTIFIER if it isn't already - selected to enabled full userptr support. + This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it + isn't already selected to enabled full userptr support. config DRM_AMDGPU_GART_DEBUGFS bool "Allow GART access through debugfs" diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 466da5954a68..ca0e435559d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -23,7 +23,7 @@ # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. -FULL_AMD_PATH=$(src)/.. +FULL_AMD_PATH=$(srctree)/$(src)/.. DISPLAY_FOLDER_NAME=display FULL_AMD_DISPLAY_PATH = $(FULL_AMD_PATH)/$(DISPLAY_FOLDER_NAME) @@ -49,11 +49,15 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \ amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ - amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ + amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ - amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o + amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ + amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \ + amdgpu_umc.o smu_v11_0_i2c.o + +amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ @@ -63,7 +67,8 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce amdgpu-y += \ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ - vega20_reg_init.o nbio_v7_4.o + vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o navi14_reg_init.o \ + arct_reg_init.o navi12_reg_init.o mxgpu_nv.o # add DF block amdgpu-y += \ @@ -74,7 +79,12 @@ amdgpu-y += \ amdgpu-y += \ gmc_v7_0.o \ gmc_v8_0.o \ - gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o + gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \ + gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o + +# add UMC block +amdgpu-y += \ + umc_v6_1.o umc_v6_0.o # add IH block amdgpu-y += \ @@ -83,14 +93,16 @@ amdgpu-y += \ iceland_ih.o \ tonga_ih.o \ cz_ih.o \ - vega10_ih.o + vega10_ih.o \ + navi10_ih.o # add PSP block amdgpu-y += \ amdgpu_psp.o \ psp_v3_1.o \ psp_v10_0.o \ - psp_v11_0.o + psp_v11_0.o \ + psp_v12_0.o # add SMC block amdgpu-y += \ @@ -107,14 +119,20 @@ amdgpu-y += \ amdgpu_gfx.o \ amdgpu_rlc.o \ gfx_v8_0.o \ - gfx_v9_0.o + gfx_v9_0.o \ + gfx_v10_0.o # add async DMA block amdgpu-y += \ amdgpu_sdma.o \ sdma_v2_4.o \ sdma_v3_0.o \ - sdma_v4_0.o + sdma_v4_0.o \ + sdma_v5_0.o + +# add MES block +amdgpu-y += \ + mes_v10_1.o # add UVD block amdgpu-y += \ @@ -132,7 +150,14 @@ amdgpu-y += \ # add VCN block amdgpu-y += \ amdgpu_vcn.o \ - vcn_v1_0.o + vcn_v1_0.o \ + vcn_v2_0.o \ + vcn_v2_5.o + +# add ATHUB block +amdgpu-y += \ + athub_v1_0.o \ + athub_v2_0.o # add amdkfd interfaces amdgpu-y += amdgpu_amdkfd.o @@ -145,7 +170,9 @@ amdgpu-y += \ amdgpu_amdkfd_fence.o \ amdgpu_amdkfd_gpuvm.o \ amdgpu_amdkfd_gfx_v8.o \ - amdgpu_amdkfd_gfx_v9.o + amdgpu_amdkfd_gfx_v9.o \ + amdgpu_amdkfd_arcturus.o \ + amdgpu_amdkfd_gfx_v10.o ifneq ($(CONFIG_DRM_AMDGPU_CIK),) amdgpu-y += amdgpu_amdkfd_gfx_v7.o @@ -172,7 +199,7 @@ endif amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o +amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o include $(FULL_AMD_PATH)/powerplay/Makefile diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8d0d7f3dd5fb..0c229a92a24b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -44,9 +44,9 @@ #include <drm/ttm/ttm_module.h> #include <drm/ttm/ttm_execbuf_util.h> -#include <drm/drmP.h> -#include <drm/drm_gem.h> #include <drm/amdgpu_drm.h> +#include <drm/drm_gem.h> +#include <drm/drm_ioctl.h> #include <drm/gpu_scheduler.h> #include <kgd_kfd_interface.h> @@ -73,6 +73,7 @@ #include "amdgpu_gmc.h" #include "amdgpu_gfx.h" #include "amdgpu_sdma.h" +#include "amdgpu_nbio.h" #include "amdgpu_dm.h" #include "amdgpu_virt.h" #include "amdgpu_csa.h" @@ -83,6 +84,11 @@ #include "amdgpu_gem.h" #include "amdgpu_doorbell.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_smu.h" +#include "amdgpu_discovery.h" +#include "amdgpu_mes.h" +#include "amdgpu_umc.h" +#include "amdgpu_mmhub.h" #define MAX_GPU_INSTANCE 16 @@ -101,6 +107,8 @@ struct amdgpu_mgpu_info uint32_t num_apu; }; +#define AMDGPU_MAX_TIMEOUT_PARAM_LENGTH 256 + /* * Modules parameters. */ @@ -117,7 +125,7 @@ extern int amdgpu_disp_priority; extern int amdgpu_hw_i2c; extern int amdgpu_pcie_gen2; extern int amdgpu_msi; -extern int amdgpu_lockup_timeout; +extern char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH]; extern int amdgpu_dpm; extern int amdgpu_fw_load_type; extern int amdgpu_aspm; @@ -131,6 +139,7 @@ extern int amdgpu_vm_fragment_size; extern int amdgpu_vm_fault_stop; extern int amdgpu_vm_debug; extern int amdgpu_vm_update_mode; +extern int amdgpu_exp_hw_support; extern int amdgpu_dc; extern int amdgpu_sched_jobs; extern int amdgpu_sched_hw_submission; @@ -142,12 +151,7 @@ extern uint amdgpu_sdma_phase_quantum; extern char *amdgpu_disable_cu; extern char *amdgpu_virtual_display; extern uint amdgpu_pp_feature_mask; -extern int amdgpu_vram_page_split; -extern int amdgpu_ngg; -extern int amdgpu_prim_buf_per_se; -extern int amdgpu_pos_buf_per_se; -extern int amdgpu_cntl_sb_buf_per_se; -extern int amdgpu_param_buf_per_se; +extern uint amdgpu_force_long_training; extern int amdgpu_job_hang_limit; extern int amdgpu_lbpw; extern int amdgpu_compute_multipipe; @@ -155,7 +159,21 @@ extern int amdgpu_gpu_recovery; extern int amdgpu_emu_mode; extern uint amdgpu_smu_memory_pool_size; extern uint amdgpu_dc_feature_mask; +extern uint amdgpu_dm_abm_level; extern struct amdgpu_mgpu_info mgpu_info; +extern int amdgpu_ras_enable; +extern uint amdgpu_ras_mask; +extern int amdgpu_async_gfx_ring; +extern int amdgpu_mcbp; +extern int amdgpu_discovery; +extern int amdgpu_mes; +extern int amdgpu_noretry; +extern int amdgpu_force_asic_type; +#ifdef CONFIG_HSA_AMD +extern int sched_policy; +#else +static const int sched_policy = KFD_SCHED_POLICY_HWS; +#endif #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; @@ -208,9 +226,11 @@ struct amdgpu_irq_src; struct amdgpu_fpriv; struct amdgpu_bo_va_mapping; struct amdgpu_atif; +struct kfd_vm_fault_info; enum amdgpu_cp_irq { - AMDGPU_CP_IRQ_GFX_EOP = 0, + AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP = 0, + AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP, AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP, AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP, AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP, @@ -270,6 +290,9 @@ struct amdgpu_ip_block_version { const struct amd_ip_funcs *funcs; }; +#define HW_REV(_Major, _Minor, _Rev) \ + ((((uint32_t) (_Major)) << 16) | ((uint32_t) (_Minor) << 8) | ((uint32_t) (_Rev))) + struct amdgpu_ip_block { struct amdgpu_ip_block_status status; const struct amdgpu_ip_block_version *version; @@ -433,6 +456,12 @@ struct amdgpu_cs_chunk { void *kdata; }; +struct amdgpu_cs_post_dep { + struct drm_syncobj *syncobj; + struct dma_fence_chain *chain; + u64 point; +}; + struct amdgpu_cs_parser { struct amdgpu_device *adev; struct drm_file *filp; @@ -457,13 +486,12 @@ struct amdgpu_cs_parser { uint64_t bytes_moved_vis_threshold; uint64_t bytes_moved; uint64_t bytes_moved_vis; - struct amdgpu_bo_list_entry *evictable; /* user fence */ struct amdgpu_bo_list_entry uf_entry; - unsigned num_post_dep_syncobjs; - struct drm_syncobj **post_dep_syncobjs; + unsigned num_post_deps; + struct amdgpu_cs_post_dep *post_deps; }; static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, @@ -514,6 +542,14 @@ struct amdgpu_allowed_register_entry { bool grbm_indexed; }; +enum amd_reset_method { + AMD_RESET_METHOD_LEGACY = 0, + AMD_RESET_METHOD_MODE0, + AMD_RESET_METHOD_MODE1, + AMD_RESET_METHOD_MODE2, + AMD_RESET_METHOD_BACO +}; + /* * ASIC specific functions. */ @@ -525,6 +561,7 @@ struct amdgpu_asic_funcs { u32 sh_num, u32 reg_offset, u32 *value); void (*set_vga_state)(struct amdgpu_device *adev, bool state); int (*reset)(struct amdgpu_device *adev); + enum amd_reset_method (*reset_method)(struct amdgpu_device *adev); /* get the reference clock */ u32 (*get_xclk)(struct amdgpu_device *adev); /* MM block clocks */ @@ -549,6 +586,8 @@ struct amdgpu_asic_funcs { uint64_t *count1); /* do we need to reset the asic at init time (e.g., kexec) */ bool (*need_reset_on_init)(struct amdgpu_device *adev); + /* PCIe replay counter */ + uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev); }; /* @@ -593,6 +632,11 @@ struct amdgpu_fw_vram_usage { u64 size; struct amdgpu_bo *reserved_bo; void *va; + + /* Offset on the top of VRAM, used as c2p write buffer. + */ + u64 mem_train_fb_loc; + bool mem_train_support; }; /* @@ -607,60 +651,20 @@ void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device); typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t); typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t); +typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device*, uint32_t); +typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t); + typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t); typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t); - -/* - * amdgpu nbio functions - * - */ -struct nbio_hdp_flush_reg { - u32 ref_and_mask_cp0; - u32 ref_and_mask_cp1; - u32 ref_and_mask_cp2; - u32 ref_and_mask_cp3; - u32 ref_and_mask_cp4; - u32 ref_and_mask_cp5; - u32 ref_and_mask_cp6; - u32 ref_and_mask_cp7; - u32 ref_and_mask_cp8; - u32 ref_and_mask_cp9; - u32 ref_and_mask_sdma0; - u32 ref_and_mask_sdma1; -}; - -struct amdgpu_nbio_funcs { - const struct nbio_hdp_flush_reg *hdp_flush_reg; - u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev); - u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev); - u32 (*get_pcie_index_offset)(struct amdgpu_device *adev); - u32 (*get_pcie_data_offset)(struct amdgpu_device *adev); - u32 (*get_rev_id)(struct amdgpu_device *adev); - void (*mc_access_enable)(struct amdgpu_device *adev, bool enable); - void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring); - u32 (*get_memsize)(struct amdgpu_device *adev); - void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, - bool use_doorbell, int doorbell_index, int doorbell_size); - void (*enable_doorbell_aperture)(struct amdgpu_device *adev, - bool enable); - void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev, - bool enable); - void (*ih_doorbell_range)(struct amdgpu_device *adev, - bool use_doorbell, int doorbell_index); - void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, - bool enable); - void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev, - bool enable); - void (*get_clockgating_state)(struct amdgpu_device *adev, - u32 *flags); - void (*ih_control)(struct amdgpu_device *adev); - void (*init_registers)(struct amdgpu_device *adev); - void (*detect_hw_virt)(struct amdgpu_device *adev); +struct amdgpu_mmio_remap { + u32 reg_offset; + resource_size_t bus_addr; }; struct amdgpu_df_funcs { - void (*init)(struct amdgpu_device *adev); + void (*sw_init)(struct amdgpu_device *adev); + void (*sw_fini)(struct amdgpu_device *adev); void (*enable_broadcast_mode)(struct amdgpu_device *adev, bool enable); u32 (*get_fb_channel_number)(struct amdgpu_device *adev); @@ -671,6 +675,15 @@ struct amdgpu_df_funcs { u32 *flags); void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev, bool enable); + int (*pmc_start)(struct amdgpu_device *adev, uint64_t config, + int is_enable); + int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config, + int is_disable); + void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config, + uint64_t *count); + uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val); + void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val, + uint32_t ficadl_val, uint32_t ficadh_val); }; /* Define the HW IP blocks will be used in driver , add more if necessary */ enum amd_hw_ip_block_type { @@ -678,6 +691,12 @@ enum amd_hw_ip_block_type { HDP_HWIP, SDMA0_HWIP, SDMA1_HWIP, + SDMA2_HWIP, + SDMA3_HWIP, + SDMA4_HWIP, + SDMA5_HWIP, + SDMA6_HWIP, + SDMA7_HWIP, MMHUB_HWIP, ATHUB_HWIP, NBIO_HWIP, @@ -694,18 +713,20 @@ enum amd_hw_ip_block_type { NBIF_HWIP, THM_HWIP, CLK_HWIP, + UMC_HWIP, + RSMU_HWIP, MAX_HWIP }; -#define HWIP_MAX_INSTANCE 6 +#define HWIP_MAX_INSTANCE 8 struct amd_powerplay { void *pp_handle; const struct amd_pm_funcs *pp_funcs; - uint32_t pp_feature; }; #define AMDGPU_RESET_MAGIC_NUM 64 +#define AMDGPU_MAX_DF_PERFMONS 4 struct amdgpu_device { struct device *dev; struct drm_device *ddev; @@ -724,7 +745,6 @@ struct amdgpu_device { int usec_timeout; const struct amdgpu_asic_funcs *asic_funcs; bool shutdown; - bool need_dma32; bool need_swiotlb; bool accel_working; struct notifier_block acpi_nb; @@ -732,6 +752,7 @@ struct amdgpu_device { struct amdgpu_debugfs debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS]; unsigned debugfs_count; #if defined(CONFIG_DEBUG_FS) + struct dentry *debugfs_preempt; struct dentry *debugfs_regs[AMDGPU_DEBUGFS_MAX_COMPONENTS]; #endif struct amdgpu_atif *atif; @@ -741,12 +762,14 @@ struct amdgpu_device { struct mutex grbm_idx_mutex; struct dev_pm_domain vga_pm_domain; bool have_disp_power_ref; + bool have_atomics_support; /* BIOS */ bool is_atom_fw; uint8_t *bios; uint32_t bios_size; struct amdgpu_bo *stolen_vga_memory; + struct amdgpu_bo *discovery_memory; uint32_t bios_scratch_reg_offset; uint32_t bios_scratch[AMDGPU_BIOS_NUM_SCRATCH]; @@ -756,6 +779,7 @@ struct amdgpu_device { void __iomem *rmmio; /* protects concurrent MM_INDEX/DATA based register access */ spinlock_t mmio_idx_lock; + struct amdgpu_mmio_remap rmmio_remap; /* protects concurrent SMC based register access */ spinlock_t smc_idx_lock; amdgpu_rreg_t smc_rreg; @@ -766,6 +790,8 @@ struct amdgpu_device { amdgpu_wreg_t pcie_wreg; amdgpu_rreg_t pciep_rreg; amdgpu_wreg_t pciep_wreg; + amdgpu_rreg64_t pcie_rreg64; + amdgpu_wreg64_t pcie_wreg64; /* protects concurrent UVD register access */ spinlock_t uvd_ctx_idx_lock; amdgpu_rreg_t uvd_ctx_rreg; @@ -799,6 +825,7 @@ struct amdgpu_device { dma_addr_t dummy_page_addr; struct amdgpu_vm_manager vm_manager; struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS]; + unsigned num_vmhubs; /* memory management */ struct amdgpu_mman mman; @@ -825,6 +852,7 @@ struct amdgpu_device { /* For pre-DCE11. DCE11 and later are in "struct amdgpu_device->dm" */ struct work_struct hotplug_work; struct amdgpu_irq_src crtc_irq; + struct amdgpu_irq_src vupdate_irq; struct amdgpu_irq_src pageflip_irq; struct amdgpu_irq_src hpd_irq; @@ -842,11 +870,20 @@ struct amdgpu_device { struct amd_powerplay powerplay; bool pp_force_state_enabled; + /* smu */ + struct smu_context smu; + /* dpm */ struct amdgpu_pm pm; u32 cg_flags; u32 pg_flags; + /* nbio */ + struct amdgpu_nbio nbio; + + /* mmhub */ + struct amdgpu_mmhub mmhub; + /* gfx */ struct amdgpu_gfx gfx; @@ -874,9 +911,19 @@ struct amdgpu_device { /* KFD */ struct amdgpu_kfd_dev kfd; + /* UMC */ + struct amdgpu_umc umc; + /* display related functionality */ struct amdgpu_display_manager dm; + /* discovery */ + uint8_t *discovery; + + /* mes */ + bool enable_mes; + struct amdgpu_mes mes; + struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM]; int num_ip_blocks; struct mutex mn_lock; @@ -890,11 +937,10 @@ struct amdgpu_device { /* soc15 register offset based on ip, instance and segment */ uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; - const struct amdgpu_nbio_funcs *nbio_funcs; const struct amdgpu_df_funcs *df_funcs; /* delayed work_func for deferring clockgating during resume */ - struct delayed_work late_init_work; + struct delayed_work delayed_init_work; struct amdgpu_virt virt; /* firmware VRAM reservation */ @@ -917,11 +963,25 @@ struct amdgpu_device { /* record last mm index being written through WREG32*/ unsigned long last_mm_index; bool in_gpu_reset; + enum pp_mp1_state mp1_state; struct mutex lock_reset; struct amdgpu_doorbell_index doorbell_index; + struct mutex notifier_lock; + int asic_reset_res; struct work_struct xgmi_reset_work; + + long gfx_timeout; + long sdma_timeout; + long video_timeout; + long compute_timeout; + + uint64_t unique_id; + uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS]; + + /* device pstate */ + int pstate; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) @@ -936,6 +996,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, void amdgpu_device_fini(struct amdgpu_device *adev); int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev); +void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, + uint32_t *buf, size_t size, bool write); uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t acc_flags); void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, @@ -975,6 +1037,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v)) #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg)) #define WREG32_PCIE_PORT(reg, v) adev->pciep_wreg(adev, (reg), (v)) +#define RREG64_PCIE(reg) adev->pcie_rreg64(adev, (reg)) +#define WREG64_PCIE(reg, v) adev->pcie_wreg64(adev, (reg), (v)) #define RREG32_SMC(reg) adev->smc_rreg(adev, (reg)) #define WREG32_SMC(reg, v) adev->smc_wreg(adev, (reg), (v)) #define RREG32_UVD_CTX(reg) adev->uvd_ctx_rreg(adev, (reg)) @@ -1035,6 +1099,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev); */ #define amdgpu_asic_set_vga_state(adev, state) (adev)->asic_funcs->set_vga_state((adev), (state)) #define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev)) +#define amdgpu_asic_reset_method(adev) (adev)->asic_funcs->reset_method((adev)) #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev)) #define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d)) #define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec)) @@ -1051,6 +1116,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev)) #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1))) #define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev)) +#define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev))) +#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter)); /* Common functions */ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); @@ -1067,6 +1134,9 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, const u32 array_size); bool amdgpu_device_is_px(struct drm_device *dev); +bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, + struct amdgpu_device *peer_adev); + /* atpx handler */ #if defined(CONFIG_VGA_SWITCHEROO) void amdgpu_register_atpx_handler(void); @@ -1156,5 +1226,24 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev ); static inline int amdgpu_dm_display_resume(struct amdgpu_device *adev) { return 0; } #endif + +void amdgpu_register_gpu_instance(struct amdgpu_device *adev); +void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev); + #include "amdgpu_object.h" + +/* used by df_v3_6.c and amdgpu_pmu.c */ +#define AMDGPU_PMU_ATTR(_name, _object) \ +static ssize_t \ +_name##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *page) \ +{ \ + BUILD_BUG_ON(sizeof(_object) >= PAGE_SIZE - 1); \ + return sprintf(page, _object "\n"); \ +} \ + \ +static struct device_attribute pmu_attr_##_name = __ATTR_RO(_name) + #endif + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index 0a4fba196b84..82155ac3288a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -24,6 +24,7 @@ */ #include <linux/irqdomain.h> +#include <linux/pci.h> #include <linux/pm_domain.h> #include <linux/platform_device.h> #include <sound/designware_i2s.h> @@ -188,7 +189,7 @@ static int acp_hw_init(void *handle) u32 val = 0; u32 count = 0; struct device *dev; - struct i2s_platform_data *i2s_pdata; + struct i2s_platform_data *i2s_pdata = NULL; struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -230,20 +231,21 @@ static int acp_hw_init(void *handle) adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), GFP_KERNEL); - if (adev->acp.acp_cell == NULL) - return -ENOMEM; + if (adev->acp.acp_cell == NULL) { + r = -ENOMEM; + goto failure; + } adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL); if (adev->acp.acp_res == NULL) { - kfree(adev->acp.acp_cell); - return -ENOMEM; + r = -ENOMEM; + goto failure; } i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL); if (i2s_pdata == NULL) { - kfree(adev->acp.acp_res); - kfree(adev->acp.acp_cell); - return -ENOMEM; + r = -ENOMEM; + goto failure; } switch (adev->asic_type) { @@ -340,14 +342,14 @@ static int acp_hw_init(void *handle) r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, ACP_DEVS); if (r) - return r; + goto failure; for (i = 0; i < ACP_DEVS ; i++) { dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev); if (r) { dev_err(dev, "Failed to add dev to genpd\n"); - return r; + goto failure; } } @@ -366,7 +368,8 @@ static int acp_hw_init(void *handle) break; if (--count == 0) { dev_err(&adev->pdev->dev, "Failed to reset ACP\n"); - return -ETIMEDOUT; + r = -ETIMEDOUT; + goto failure; } udelay(100); } @@ -383,7 +386,8 @@ static int acp_hw_init(void *handle) break; if (--count == 0) { dev_err(&adev->pdev->dev, "Failed to reset ACP\n"); - return -ETIMEDOUT; + r = -ETIMEDOUT; + goto failure; } udelay(100); } @@ -392,6 +396,13 @@ static int acp_hw_init(void *handle) val &= ~ACP_SOFT_RESET__SoftResetAud_MASK; cgs_write_register(adev->acp.cgs_device, mmACP_SOFT_RESET, val); return 0; + +failure: + kfree(i2s_pdata); + kfree(adev->acp.acp_res); + kfree(adev->acp.acp_cell); + kfree(adev->acp.acp_genpd); + return r; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 4376b17ca594..1e41367ef74e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -27,7 +27,7 @@ #include <linux/power_supply.h> #include <linux/pm_runtime.h> #include <acpi/video.h> -#include <drm/drmP.h> + #include <drm/drm_crtc_helper.h> #include "amdgpu.h" #include "amdgpu_pm.h" @@ -464,8 +464,7 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev, } } if (req.pending & ATIF_DGPU_DISPLAY_EVENT) { - if ((adev->flags & AMD_IS_PX) && - amdgpu_atpx_dgpu_req_power_for_displays()) { + if (adev->flags & AMD_IS_PX) { pm_runtime_get_sync(adev->ddev->dev); /* Just fire off a uevent and let userspace tell us what to do */ drm_helper_hpd_irq_event(adev->ddev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c index 3889486f71fe..a4d65973bf7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c @@ -25,7 +25,7 @@ */ #include <linux/hdmi.h> #include <linux/gcd.h> -#include <drm/drmP.h> + #include <drm/amdgpu_drm.h> #include "amdgpu.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index fe1d7368c1e6..d3da9dde4ee1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -22,11 +22,13 @@ #include "amdgpu_amdkfd.h" #include "amd_shared.h" -#include <drm/drmP.h> + #include "amdgpu.h" #include "amdgpu_gfx.h" +#include "amdgpu_dma_buf.h" #include <linux/module.h> #include <linux/dma-buf.h> +#include "amdgpu_xgmi.h" static const unsigned int compute_vmid_bitmap = 0xFF00; @@ -61,36 +63,10 @@ void amdgpu_amdkfd_fini(void) void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) { - const struct kfd2kgd_calls *kfd2kgd; - - switch (adev->asic_type) { -#ifdef CONFIG_DRM_AMDGPU_CIK - case CHIP_KAVERI: - case CHIP_HAWAII: - kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions(); - break; -#endif - case CHIP_CARRIZO: - case CHIP_TONGA: - case CHIP_FIJI: - case CHIP_POLARIS10: - case CHIP_POLARIS11: - case CHIP_POLARIS12: - kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); - break; - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_RAVEN: - kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions(); - break; - default: - dev_info(adev->dev, "kfd not supported on this ASIC\n"); - return; - } + bool vf = amdgpu_sriov_vf(adev); adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev, - adev->pdev, kfd2kgd); + adev->pdev, adev->asic_type, vf); if (adev->kfd.dev) amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; @@ -148,21 +124,15 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) }; /* this is going to have a few of the MSBs set that we need to - * clear */ + * clear + */ bitmap_complement(gpu_resources.queue_bitmap, adev->gfx.mec.queue_bitmap, KGD_MAX_QUEUES); - /* remove the KIQ bit as well */ - if (adev->gfx.kiq.ring.sched.ready) - clear_bit(amdgpu_gfx_queue_to_bit(adev, - adev->gfx.kiq.ring.me - 1, - adev->gfx.kiq.ring.pipe, - adev->gfx.kiq.ring.queue), - gpu_resources.queue_bitmap); - /* According to linux/bitmap.h we shouldn't use bitmap_clear if - * nbits is not compile time constant */ + * nbits is not compile time constant + */ last_valid_bit = 1 /* only first MEC can have compute queues */ * adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; @@ -189,7 +159,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) adev->doorbell_index.last_non_cp; } - kgd2kfd_device_init(adev->kfd.dev, &gpu_resources); + kgd2kfd_device_init(adev->kfd.dev, adev->ddev, &gpu_resources); } } @@ -335,6 +305,77 @@ void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) amdgpu_bo_unref(&(bo)); } +int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, + void **mem_obj) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct amdgpu_bo *bo = NULL; + struct amdgpu_bo_param bp; + int r; + + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = 1; + bp.domain = AMDGPU_GEM_DOMAIN_GWS; + bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS; + bp.type = ttm_bo_type_device; + bp.resv = NULL; + + r = amdgpu_bo_create(adev, &bp, &bo); + if (r) { + dev_err(adev->dev, + "failed to allocate gws BO for amdkfd (%d)\n", r); + return r; + } + + *mem_obj = bo; + return 0; +} + +void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj) +{ + struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj; + + amdgpu_bo_unref(&bo); +} + +uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd, + enum kgd_engine_type type) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + switch (type) { + case KGD_ENGINE_PFP: + return adev->gfx.pfp_fw_version; + + case KGD_ENGINE_ME: + return adev->gfx.me_fw_version; + + case KGD_ENGINE_CE: + return adev->gfx.ce_fw_version; + + case KGD_ENGINE_MEC1: + return adev->gfx.mec_fw_version; + + case KGD_ENGINE_MEC2: + return adev->gfx.mec2_fw_version; + + case KGD_ENGINE_RLC: + return adev->gfx.rlc_fw_version; + + case KGD_ENGINE_SDMA1: + return adev->sdma.instance[0].fw_version; + + case KGD_ENGINE_SDMA2: + return adev->sdma.instance[1].fw_version; + + default: + return 0; + } + + return 0; +} + void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, struct kfd_local_mem_info *mem_info) { @@ -361,9 +402,12 @@ void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, if (amdgpu_sriov_vf(adev)) mem_info->mem_clk_max = adev->clock.default_mclk / 100; - else if (adev->powerplay.pp_funcs) - mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100; - else + else if (adev->powerplay.pp_funcs) { + if (amdgpu_emu_mode == 1) + mem_info->mem_clk_max = 0; + else + mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100; + } else mem_info->mem_clk_max = 100; } @@ -481,6 +525,34 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd) return adev->gmc.xgmi.hive_id; } +uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src) +{ + struct amdgpu_device *peer_adev = (struct amdgpu_device *)src; + struct amdgpu_device *adev = (struct amdgpu_device *)dst; + int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev); + + if (ret < 0) { + DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n", + adev->gmc.xgmi.physical_node_id, + peer_adev->gmc.xgmi.physical_node_id, ret); + ret = 0; + } + return (uint8_t)ret; +} + +uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + return adev->rmmio_remap.bus_addr; +} + +uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + return adev->gds.gws_size; +} int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, @@ -541,8 +613,12 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->switch_power_profile) + if (is_support_sw_smu(adev)) + smu_switch_power_profile(&adev->smu, + PP_SMC_POWER_PROFILE_COMPUTE, + !idle); + else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->switch_power_profile) amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_COMPUTE, !idle); @@ -558,6 +634,13 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) return false; } +bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + return adev->have_atomics_support; +} + #ifndef CONFIG_HSA_AMD bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) { @@ -583,28 +666,14 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) return 0; } -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) -{ - return NULL; -} - -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) -{ - return NULL; -} - -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) -{ - return NULL; -} - struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, - const struct kfd2kgd_calls *f2g) + unsigned int asic_type, bool vf) { return NULL; } bool kgd2kfd_device_init(struct kfd_dev *kfd, + struct drm_device *ddev, const struct kgd2kfd_shared_resources *gpu_resources) { return false; @@ -640,4 +709,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd) void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) { } + +void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) +{ +} #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 0b31a1859023..069d5d230810 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -57,11 +57,10 @@ struct kgd_mem { unsigned int mapped_to_gpu_memory; uint64_t va; - uint32_t mapping_flags; + uint32_t alloc_flags; atomic_t invalid; struct amdkfd_process_info *process_info; - struct page **user_pages; struct amdgpu_sync sync; @@ -81,6 +80,18 @@ struct amdgpu_kfd_dev { uint64_t vram_used; }; +enum kgd_engine_type { + KGD_ENGINE_PFP = 1, + KGD_ENGINE_ME, + KGD_ENGINE_CE, + KGD_ENGINE_MEC1, + KGD_ENGINE_MEC2, + KGD_ENGINE_RLC, + KGD_ENGINE_SDMA1, + KGD_ENGINE_SDMA2, + KGD_ENGINE_MAX +}; + struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, struct mm_struct *mm); bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); @@ -124,10 +135,7 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len); void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle); - -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void); +bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); @@ -142,6 +150,12 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr, bool mqd_gfx9); void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); +int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, void **mem_obj); +void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj); +int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem); +int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem); +uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd, + enum kgd_engine_type type); void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, struct kfd_local_mem_info *mem_info); uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd); @@ -155,11 +169,21 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, uint32_t *flags); uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd); - +uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd); +uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd); +uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src); + +/* Read user wptr from a specified user address space with page fault + * disabled. The memory must be pinned and mapped to the hardware when + * this is called in hqd_load functions, so it should never fault in + * the first place. This resolves a circular lock dependency involving + * four locks, including the DQM lock and mmap_sem. + */ #define read_user_wptr(mmptr, wptr, dst) \ ({ \ bool valid = false; \ if ((mmptr) && (wptr)) { \ + pagefault_disable(); \ if ((mmptr) == current->mm) { \ valid = !get_user((dst), (wptr)); \ } else if (current->mm == NULL) { \ @@ -167,6 +191,7 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd); valid = !get_user((dst), (wptr)); \ unuse_mm(mmptr); \ } \ + pagefault_enable(); \ } \ valid; \ }) @@ -217,8 +242,9 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo); int kgd2kfd_init(void); void kgd2kfd_exit(void); struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, - const struct kfd2kgd_calls *f2g); + unsigned int asic_type, bool vf); bool kgd2kfd_device_init(struct kfd_dev *kfd, + struct drm_device *ddev, const struct kgd2kfd_shared_resources *gpu_resources); void kgd2kfd_device_exit(struct kfd_dev *kfd); void kgd2kfd_suspend(struct kfd_dev *kfd); @@ -230,5 +256,6 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm); int kgd2kfd_resume_mm(struct mm_struct *mm); int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, struct dma_fence *fence); +void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd); #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c new file mode 100644 index 000000000000..b6713e0ed1b2 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -0,0 +1,284 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include <linux/module.h> +#include <linux/fdtable.h> +#include <linux/uaccess.h> +#include <linux/mmu_context.h> +#include <linux/firmware.h> +#include "amdgpu.h" +#include "amdgpu_amdkfd.h" +#include "sdma0/sdma0_4_2_2_offset.h" +#include "sdma0/sdma0_4_2_2_sh_mask.h" +#include "sdma1/sdma1_4_2_2_offset.h" +#include "sdma1/sdma1_4_2_2_sh_mask.h" +#include "sdma2/sdma2_4_2_2_offset.h" +#include "sdma2/sdma2_4_2_2_sh_mask.h" +#include "sdma3/sdma3_4_2_2_offset.h" +#include "sdma3/sdma3_4_2_2_sh_mask.h" +#include "sdma4/sdma4_4_2_2_offset.h" +#include "sdma4/sdma4_4_2_2_sh_mask.h" +#include "sdma5/sdma5_4_2_2_offset.h" +#include "sdma5/sdma5_4_2_2_sh_mask.h" +#include "sdma6/sdma6_4_2_2_offset.h" +#include "sdma6/sdma6_4_2_2_sh_mask.h" +#include "sdma7/sdma7_4_2_2_offset.h" +#include "sdma7/sdma7_4_2_2_sh_mask.h" +#include "v9_structs.h" +#include "soc15.h" +#include "soc15d.h" +#include "amdgpu_amdkfd_gfx_v9.h" + +#define HQD_N_REGS 56 +#define DUMP_REG(addr) do { \ + if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ + break; \ + (*dump)[i][0] = (addr) << 2; \ + (*dump)[i++][1] = RREG32(addr); \ + } while (0) + +static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) +{ + return (struct amdgpu_device *)kgd; +} + +static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) +{ + return (struct v9_sdma_mqd *)mqd; +} + +static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, + unsigned int engine_id, + unsigned int queue_id) +{ + uint32_t sdma_engine_reg_base[8] = { + SOC15_REG_OFFSET(SDMA0, 0, + mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA1, 0, + mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA2, 0, + mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA3, 0, + mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA4, 0, + mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA5, 0, + mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA6, 0, + mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA7, 0, + mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL + }; + + uint32_t retval = sdma_engine_reg_base[engine_id] + + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); + + pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, + queue_id, retval); + + return retval; +} + +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_rlc_reg_offset; + unsigned long end_jiffies; + uint32_t data; + uint64_t data64; + uint64_t __user *wptr64 = (uint64_t __user *)wptr; + + m = get_sdma_mqd(mqd); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, + m->sdma_queue_id); + + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); + + end_jiffies = msecs_to_jiffies(2000) + jiffies; + while (true) { + data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); + if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); + return -ETIME; + } + usleep_range(500, 1000); + } + + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET, + m->sdmax_rlcx_doorbell_offset); + + data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, + ENABLE, 1); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, + m->sdmax_rlcx_rb_rptr); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); + if (read_user_wptr(mm, wptr64, data64)) { + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, + lower_32_bits(data64)); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, + upper_32_bits(data64)); + } else { + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, + m->sdmax_rlcx_rb_rptr); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + } + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); + + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, + m->sdmax_rlcx_rb_base_hi); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + m->sdmax_rlcx_rb_rptr_addr_lo); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + m->sdmax_rlcx_rb_rptr_addr_hi); + + data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, + RB_ENABLE, 1); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); + + return 0; +} + +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, + engine_id, queue_id); + uint32_t i = 0, reg; +#undef HQD_N_REGS +#define HQD_N_REGS (19+6+7+10) + + *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) + DUMP_REG(sdma_rlc_reg_offset + reg); + for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) + DUMP_REG(sdma_rlc_reg_offset + reg); + for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; + reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) + DUMP_REG(sdma_rlc_reg_offset + reg); + for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; + reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) + DUMP_REG(sdma_rlc_reg_offset + reg); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_rlc_reg_offset; + uint32_t sdma_rlc_rb_cntl; + + m = get_sdma_mqd(mqd); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, + m->sdma_queue_id); + + sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); + + if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) + return true; + + return false; +} + +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int utimeout) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_rlc_reg_offset; + uint32_t temp; + unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; + + m = get_sdma_mqd(mqd); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, + m->sdma_queue_id); + + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); + temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); + + while (true) { + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); + if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); + return -ETIME; + } + usleep_range(500, 1000); + } + + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | + SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); + + m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr_hi = + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI); + + return 0; +} + +const struct kfd2kgd_calls arcturus_kfd2kgd = { + .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, + .init_interrupts = kgd_gfx_v9_init_interrupts, + .hqd_load = kgd_gfx_v9_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_gfx_v9_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_gfx_v9_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_gfx_v9_address_watch_disable, + .address_watch_execute = kgd_gfx_v9_address_watch_execute, + .wave_control_execute = kgd_gfx_v9_wave_control_execute, + .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_info = + kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, + .get_tile_config = kgd_gfx_v9_get_tile_config, + .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, + .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, + .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, + .get_hive_id = amdgpu_amdkfd_get_hive_id, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c new file mode 100644 index 000000000000..61cd707158e4 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -0,0 +1,838 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include <linux/mmu_context.h> +#include "amdgpu.h" +#include "amdgpu_amdkfd.h" +#include "gc/gc_10_1_0_offset.h" +#include "gc/gc_10_1_0_sh_mask.h" +#include "navi10_enum.h" +#include "athub/athub_2_0_0_offset.h" +#include "athub/athub_2_0_0_sh_mask.h" +#include "oss/osssys_5_0_0_offset.h" +#include "oss/osssys_5_0_0_sh_mask.h" +#include "soc15_common.h" +#include "v10_structs.h" +#include "nv.h" +#include "nvd.h" +#include "gfxhub_v2_0.h" + +enum hqd_dequeue_request_type { + NO_ACTION = 0, + DRAIN_PIPE, + RESET_WAVES, + SAVE_WAVES +}; + +/* Because of REG_GET_FIELD() being used, we put this function in the + * asic specific file. + */ +static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, + struct tile_config *config) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + config->gb_addr_config = adev->gfx.config.gb_addr_config; +#if 0 +/* TODO - confirm REG_GET_FIELD x2, should be OK as is... but + * MC_ARB_RAMCFG register doesn't exist on Vega10 - initial amdgpu + * changes commented out related code, doing the same here for now but + * need to sync with Ken et al + */ + config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, + MC_ARB_RAMCFG, NOOFBANK); + config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, + MC_ARB_RAMCFG, NOOFRANKS); +#endif + + config->tile_config_ptr = adev->gfx.config.tile_mode_array; + config->num_tile_configs = + ARRAY_SIZE(adev->gfx.config.tile_mode_array); + config->macro_tile_config_ptr = + adev->gfx.config.macrotile_mode_array; + config->num_macro_tile_configs = + ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); + + return 0; +} + +static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) +{ + return (struct amdgpu_device *)kgd; +} + +static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, + uint32_t queue, uint32_t vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + mutex_lock(&adev->srbm_mutex); + nv_grbm_select(adev, mec, pipe, queue, vmid); +} + +static void unlock_srbm(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + +static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + lock_srbm(kgd, mec, pipe, queue_id, 0); +} + +static uint32_t get_queue_mask(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id) +{ + unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + + queue_id) & 31; + + return ((uint32_t)1) << bit; +} + +static void release_queue(struct kgd_dev *kgd) +{ + unlock_srbm(kgd); +} + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, + uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + lock_srbm(kgd, 0, 0, 0, vmid); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); + /* APE1 no longer exists on GFX9 */ + + unlock_srbm(kgd); +} + +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + /* + * We have to assume that there is no outstanding mapping. + * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because + * a mapping is in progress or because a mapping finished + * and the SW cleared it. + * So the protocol is to always wait & clear. + */ + uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | + ATC_VMID0_PASID_MAPPING__VALID_MASK; + + pr_debug("pasid 0x%x vmid %d, reg value %x\n", pasid, vmid, pasid_mapping); + + pr_debug("ATHUB, reg %x\n", SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid); + WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid, + pasid_mapping); + +#if 0 + /* TODO: uncomment this code when the hardware support is ready. */ + while (!(RREG32(SOC15_REG_OFFSET( + ATHUB, 0, + mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & + (1U << vmid))) + cpu_relax(); + + pr_debug("ATHUB mapping update finished\n"); + WREG32(SOC15_REG_OFFSET(ATHUB, 0, + mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), + 1U << vmid); +#endif + + /* Mapping vmid to pasid also for IH block */ + pr_debug("update mapping for IH block and mmhub"); + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, + pasid_mapping); + + return 0; +} + +/* TODO - RING0 form of field is obsolete, seems to date back to SI + * but still works + */ + +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t mec; + uint32_t pipe; + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + lock_srbm(kgd, mec, pipe, 0, 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), + CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | + CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); + + unlock_srbm(kgd); + + return 0; +} + +static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, + unsigned int engine_id, + unsigned int queue_id) +{ + uint32_t sdma_engine_reg_base[2] = { + SOC15_REG_OFFSET(SDMA0, 0, + mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, + /* On gfx10, mmSDMA1_xxx registers are defined NOT based + * on SDMA1 base address (dw 0x1860) but based on SDMA0 + * base address (dw 0x1260). Therefore use mmSDMA0_RLC0_RB_CNTL + * instead of mmSDMA1_RLC0_RB_CNTL for the base address calc + * below + */ + SOC15_REG_OFFSET(SDMA1, 0, + mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL + }; + + uint32_t retval = sdma_engine_reg_base[engine_id] + + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); + + pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, + queue_id, retval); + + return retval; +} + +#if 0 +static uint32_t get_watch_base_addr(struct amdgpu_device *adev) +{ + uint32_t retval = SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) - + mmTCP_WATCH0_ADDR_H; + + pr_debug("kfd: reg watch base address: 0x%x\n", retval); + + return retval; +} +#endif + +static inline struct v10_compute_mqd *get_mqd(void *mqd) +{ + return (struct v10_compute_mqd *)mqd; +} + +static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd) +{ + return (struct v10_sdma_mqd *)mqd; +} + +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v10_compute_mqd *m; + uint32_t *mqd_hqd; + uint32_t reg, hqd_base, data; + + m = get_mqd(mqd); + + pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id); + acquire_queue(kgd, pipe_id, queue_id); + + /* HIQ is set during driver init period with vmid set to 0*/ + if (m->cp_hqd_vmid == 0) { + uint32_t value, mec, pipe; + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", + mec, pipe, queue_id); + value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); + value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, + ((mec << 5) | (pipe << 3) | queue_id | 0x80)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); + } + + /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ + mqd_hqd = &m->cp_mqd_base_addr_lo; + hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); + + for (reg = hqd_base; + reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) + WREG32(reg, mqd_hqd[reg - hqd_base]); + + + /* Activate doorbell logic before triggering WPTR poll. */ + data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, + CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); + + if (wptr) { + /* Don't read wptr with get_user because the user + * context may not be accessible (if this function + * runs in a work queue). Instead trigger a one-shot + * polling read from memory in the CP. This assumes + * that wptr is GPU-accessible in the queue's VMID via + * ATC or SVM. WPTR==RPTR before starting the poll so + * the CP starts fetching new commands from the right + * place. + * + * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit + * tricky. Assume that the queue didn't overflow. The + * number of valid bits in the 32-bit RPTR depends on + * the queue size. The remaining bits are taken from + * the saved 64-bit WPTR. If the WPTR wrapped, add the + * queue size. + */ + uint32_t queue_size = + 2 << REG_GET_FIELD(m->cp_hqd_pq_control, + CP_HQD_PQ_CONTROL, QUEUE_SIZE); + uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); + + if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) + guessed_wptr += queue_size; + guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); + guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), + lower_32_bits(guessed_wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), + upper_32_bits(guessed_wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), + lower_32_bits((uint64_t)wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + upper_32_bits((uint64_t)wptr)); + pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, get_queue_mask(adev, pipe_id, queue_id)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), + get_queue_mask(adev, pipe_id, queue_id)); + } + + /* Start the EOP fetcher */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), + REG_SET_FIELD(m->cp_hqd_eop_rptr, + CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); + + data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); + + release_queue(kgd); + + return 0; +} + +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t i = 0, reg; +#define HQD_N_REGS 56 +#define DUMP_REG(addr) do { \ + if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ + break; \ + (*dump)[i][0] = (addr) << 2; \ + (*dump)[i++][1] = RREG32(addr); \ + } while (0) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + acquire_queue(kgd, pipe_id, queue_id); + + for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); + reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) + DUMP_REG(reg); + + release_queue(kgd); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v10_sdma_mqd *m; + uint32_t sdma_rlc_reg_offset; + unsigned long end_jiffies; + uint32_t data; + uint64_t data64; + uint64_t __user *wptr64 = (uint64_t __user *)wptr; + + m = get_sdma_mqd(mqd); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, + m->sdma_queue_id); + + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); + + end_jiffies = msecs_to_jiffies(2000) + jiffies; + while (true) { + data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); + if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); + return -ETIME; + } + usleep_range(500, 1000); + } + + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET, + m->sdmax_rlcx_doorbell_offset); + + data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, + ENABLE, 1); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, + m->sdmax_rlcx_rb_rptr); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); + if (read_user_wptr(mm, wptr64, data64)) { + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, + lower_32_bits(data64)); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, + upper_32_bits(data64)); + } else { + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, + m->sdmax_rlcx_rb_rptr); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + } + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); + + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, + m->sdmax_rlcx_rb_base_hi); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + m->sdmax_rlcx_rb_rptr_addr_lo); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + m->sdmax_rlcx_rb_rptr_addr_hi); + + data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, + RB_ENABLE, 1); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); + + return 0; +} + +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, + engine_id, queue_id); + uint32_t i = 0, reg; +#undef HQD_N_REGS +#define HQD_N_REGS (19+6+7+10) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) + DUMP_REG(sdma_rlc_reg_offset + reg); + for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) + DUMP_REG(sdma_rlc_reg_offset + reg); + for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; + reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) + DUMP_REG(sdma_rlc_reg_offset + reg); + for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; + reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) + DUMP_REG(sdma_rlc_reg_offset + reg); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t act; + bool retval = false; + uint32_t low, high; + + acquire_queue(kgd, pipe_id, queue_id); + act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); + if (act) { + low = lower_32_bits(queue_address >> 8); + high = upper_32_bits(queue_address >> 8); + + if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) && + high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) + retval = true; + } + release_queue(kgd); + return retval; +} + +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v10_sdma_mqd *m; + uint32_t sdma_rlc_reg_offset; + uint32_t sdma_rlc_rb_cntl; + + m = get_sdma_mqd(mqd); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, + m->sdma_queue_id); + + sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); + + if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) + return true; + + return false; +} + +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + enum hqd_dequeue_request_type type; + unsigned long end_jiffies; + uint32_t temp; + struct v10_compute_mqd *m = get_mqd(mqd); + +#if 0 + unsigned long flags; + int retry; +#endif + + acquire_queue(kgd, pipe_id, queue_id); + + if (m->cp_hqd_vmid == 0) + WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); + + switch (reset_type) { + case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: + type = DRAIN_PIPE; + break; + case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: + type = RESET_WAVES; + break; + default: + type = DRAIN_PIPE; + break; + } + +#if 0 /* Is this still needed? */ + /* Workaround: If IQ timer is active and the wait time is close to or + * equal to 0, dequeueing is not safe. Wait until either the wait time + * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is + * cleared before continuing. Also, ensure wait times are set to at + * least 0x3. + */ + local_irq_save(flags); + preempt_disable(); + retry = 5000; /* wait for 500 usecs at maximum */ + while (true) { + temp = RREG32(mmCP_HQD_IQ_TIMER); + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { + pr_debug("HW is processing IQ\n"); + goto loop; + } + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) + == 3) /* SEM-rearm is safe */ + break; + /* Wait time 3 is safe for CP, but our MMIO read/write + * time is close to 1 microsecond, so check for 10 to + * leave more buffer room + */ + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) + >= 10) + break; + pr_debug("IQ timer is active\n"); + } else + break; +loop: + if (!retry) { + pr_err("CP HQD IQ timer status time out\n"); + break; + } + ndelay(100); + --retry; + } + retry = 1000; + while (true) { + temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); + if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) + break; + pr_debug("Dequeue request is pending\n"); + + if (!retry) { + pr_err("CP HQD dequeue request time out\n"); + break; + } + ndelay(100); + --retry; + } + local_irq_restore(flags); + preempt_enable(); +#endif + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); + + end_jiffies = (utimeout * HZ / 1000) + jiffies; + while (true) { + temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) + break; + if (time_after(jiffies, end_jiffies)) { + pr_err("cp queue preemption time out.\n"); + release_queue(kgd); + return -ETIME; + } + usleep_range(500, 1000); + } + + release_queue(kgd); + return 0; +} + +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int utimeout) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v10_sdma_mqd *m; + uint32_t sdma_rlc_reg_offset; + uint32_t temp; + unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; + + m = get_sdma_mqd(mqd); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, + m->sdma_queue_id); + + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); + temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); + + while (true) { + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); + if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); + return -ETIME; + } + usleep_range(500, 1000); + } + + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | + SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); + + m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr_hi = + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI); + + return 0; +} + +static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, + uint8_t vmid, uint16_t *p_pasid) +{ + uint32_t value; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; + + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); +} + +static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) +{ + signed long r; + uint32_t seq; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + + spin_lock(&adev->gfx.kiq.ring_lock); + amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_PASID(pasid)); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock(&adev->gfx.kiq.ring_lock); + + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return -ETIME; + } + + return 0; +} + +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + int vmid; + uint16_t queried_pasid; + bool ret; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + + if (amdgpu_emu_mode == 0 && ring->sched.ready) + return invalidate_tlbs_with_kiq(adev, pasid); + + for (vmid = 0; vmid < 16; vmid++) { + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) + continue; + + ret = get_atc_vmid_pasid_mapping_info(kgd, vmid, + &queried_pasid); + if (ret && queried_pasid == pasid) { + amdgpu_gmc_flush_gpu_tlb(adev, vmid, + AMDGPU_GFXHUB_0, 0); + break; + } + } + + return 0; +} + +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("non kfd vmid %d\n", vmid); + return 0; + } + + amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0); + return 0; +} + +static int kgd_address_watch_disable(struct kgd_dev *kgd) +{ + return 0; +} + +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo) +{ + return 0; +} + +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t data = 0; + + mutex_lock(&adev->grbm_idx_mutex); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); + + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + INSTANCE_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SA_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SE_BROADCAST_WRITES, 1); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset) +{ + return 0; +} + +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint64_t page_table_base) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("trying to set page table base for wrong VMID %u\n", + vmid); + return; + } + + /* SDMA is on gfxhub as well for Navi1* series */ + gfxhub_v2_0_setup_vm_pt_regs(adev, vmid, page_table_base); +} + +const struct kfd2kgd_calls gfx_v10_kfd2kgd = { + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_address_watch_disable, + .address_watch_execute = kgd_address_watch_execute, + .wave_control_execute = kgd_wave_control_execute, + .address_watch_get_offset = kgd_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_info = + get_atc_vmid_pasid_mapping_info, + .get_tile_config = amdgpu_amdkfd_get_tile_config, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, + .get_hive_id = amdgpu_amdkfd_get_hive_id, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index ff7fac7df34b..6e6f0a99ec06 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -20,16 +20,12 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include <linux/fdtable.h> -#include <linux/uaccess.h> -#include <linux/firmware.h> #include <linux/mmu_context.h> -#include <drm/drmP.h> + #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "cikd.h" #include "cik_sdma.h" -#include "amdgpu_ucode.h" #include "gfx_v7_0.h" #include "gca/gfx_7_2_d.h" #include "gca/gfx_7_2_enum.h" @@ -88,66 +84,6 @@ union TCP_WATCH_CNTL_BITS { float f32All; }; -/* - * Register access functions - */ - -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, - uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, - uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); - -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, - unsigned int vmid); - -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm); -static int kgd_hqd_dump(struct kgd_dev *kgd, - uint32_t pipe_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, - uint32_t __user *wptr, struct mm_struct *mm); -static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, - uint32_t engine_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id); - -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, - enum kfd_preempt_type reset_type, - unsigned int utimeout, uint32_t pipe_id, - uint32_t queue_id); -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, - unsigned int utimeout); -static int kgd_address_watch_disable(struct kgd_dev *kgd); -static int kgd_address_watch_execute(struct kgd_dev *kgd, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo); -static int kgd_wave_control_execute(struct kgd_dev *kgd, - uint32_t gfx_index_val, - uint32_t sq_cmd); -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, - unsigned int watch_point_id, - unsigned int reg_offset); - -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid); - -static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); -static void set_scratch_backing_va(struct kgd_dev *kgd, - uint64_t va, uint32_t vmid); -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base); -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); -static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd); - /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. */ @@ -173,38 +109,6 @@ static int get_tile_config(struct kgd_dev *kgd, return 0; } -static const struct kfd2kgd_calls kfd2kgd = { - .program_sh_mem_settings = kgd_program_sh_mem_settings, - .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_interrupts = kgd_init_interrupts, - .hqd_load = kgd_hqd_load, - .hqd_sdma_load = kgd_hqd_sdma_load, - .hqd_dump = kgd_hqd_dump, - .hqd_sdma_dump = kgd_hqd_sdma_dump, - .hqd_is_occupied = kgd_hqd_is_occupied, - .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, - .hqd_destroy = kgd_hqd_destroy, - .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, - .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, - .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, - .get_fw_version = get_fw_version, - .set_scratch_backing_va = set_scratch_backing_va, - .get_tile_config = get_tile_config, - .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, - .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, -}; - -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) -{ - return (struct kfd2kgd_calls *)&kfd2kgd; -} - static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) { return (struct amdgpu_device *)kgd; @@ -307,14 +211,15 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) return 0; } -static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) +static inline uint32_t get_sdma_rlc_reg_offset(struct cik_sdma_rlc_registers *m) { uint32_t retval; retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; - pr_debug("kfd: sdma base address: 0x%x\n", retval); + pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", + m->sdma_engine_id, m->sdma_queue_id, retval); return retval; } @@ -417,60 +322,52 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; unsigned long end_jiffies; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t data; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); end_jiffies = msecs_to_jiffies(2000) + jiffies; while (true) { - data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); return -ETIME; + } usleep_range(500, 1000); } - if (m->sdma_engine_id) { - data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL); - data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL, - RESUME_CTX, 0); - WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data); - } else { - data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL); - data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, - RESUME_CTX, 0); - WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); - } data = REG_SET_FIELD(m->sdma_rlc_doorbell, SDMA0_RLC0_DOORBELL, ENABLE, 1); - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdma_rlc_rb_rptr); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, + m->sdma_rlc_rb_rptr); if (read_user_wptr(mm, wptr, data)) - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, data); else - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, m->sdma_rlc_rb_rptr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_VIRTUAL_ADDR, m->sdma_rlc_virtual_addr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, m->sdma_rlc_rb_base_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdma_rlc_rb_rptr_addr_lo); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdma_rlc_rb_rptr_addr_hi); data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); return 0; } @@ -528,13 +425,13 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); - sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) return true; @@ -649,32 +546,34 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t temp; unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); while (true) { - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); return -ETIME; + } usleep_range(500, 1000); } - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, - RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); - m->sdma_rlc_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); + m->sdma_rlc_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); return 0; } @@ -762,24 +661,16 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]; } -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid) +static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, + uint8_t vmid, uint16_t *p_pasid) { - uint32_t reg; + uint32_t value; struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; -} + value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid) -{ - uint32_t reg; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } static void set_scratch_backing_va(struct kgd_dev *kgd, @@ -792,63 +683,6 @@ static void set_scratch_backing_va(struct kgd_dev *kgd, unlock_srbm(kgd); } -static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - const union amdgpu_firmware_header *hdr; - - switch (type) { - case KGD_ENGINE_PFP: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.pfp_fw->data; - break; - - case KGD_ENGINE_ME: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.me_fw->data; - break; - - case KGD_ENGINE_CE: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.ce_fw->data; - break; - - case KGD_ENGINE_MEC1: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec_fw->data; - break; - - case KGD_ENGINE_MEC2: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec2_fw->data; - break; - - case KGD_ENGINE_RLC: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.rlc_fw->data; - break; - - case KGD_ENGINE_SDMA1: - hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[0].fw->data; - break; - - case KGD_ENGINE_SDMA2: - hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[1].fw->data; - break; - - default: - return 0; - } - - if (hdr == NULL) - return 0; - - /* Only 12 bit in use*/ - return hdr->common.ucode_version; -} - static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base) { @@ -916,3 +750,28 @@ static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd) return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); } + +const struct kfd2kgd_calls gfx_v7_kfd2kgd = { + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_address_watch_disable, + .address_watch_execute = kgd_address_watch_execute, + .wave_control_execute = kgd_wave_control_execute, + .address_watch_get_offset = kgd_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info, + .set_scratch_backing_va = set_scratch_backing_va, + .get_tile_config = get_tile_config, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, + .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 56ea929f524b..bfbddedb2380 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -20,15 +20,10 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include <linux/module.h> -#include <linux/fdtable.h> -#include <linux/uaccess.h> -#include <linux/firmware.h> #include <linux/mmu_context.h> -#include <drm/drmP.h> + #include "amdgpu.h" #include "amdgpu_amdkfd.h" -#include "amdgpu_ucode.h" #include "gfx_v8_0.h" #include "gca/gfx_8_0_sh_mask.h" #include "gca/gfx_8_0_d.h" @@ -46,63 +41,6 @@ enum hqd_dequeue_request_type { RESET_WAVES }; -/* - * Register access functions - */ - -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, - uint32_t sh_mem_config, - uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, - uint32_t sh_mem_bases); -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, - unsigned int vmid); -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm); -static int kgd_hqd_dump(struct kgd_dev *kgd, - uint32_t pipe_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, - uint32_t __user *wptr, struct mm_struct *mm); -static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, - uint32_t engine_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id); -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, - enum kfd_preempt_type reset_type, - unsigned int utimeout, uint32_t pipe_id, - uint32_t queue_id); -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, - unsigned int utimeout); -static int kgd_address_watch_disable(struct kgd_dev *kgd); -static int kgd_address_watch_execute(struct kgd_dev *kgd, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo); -static int kgd_wave_control_execute(struct kgd_dev *kgd, - uint32_t gfx_index_val, - uint32_t sq_cmd); -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, - unsigned int watch_point_id, - unsigned int reg_offset); - -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid); -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid); -static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); -static void set_scratch_backing_va(struct kgd_dev *kgd, - uint64_t va, uint32_t vmid); -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base); -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); - /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. */ @@ -128,39 +66,6 @@ static int get_tile_config(struct kgd_dev *kgd, return 0; } -static const struct kfd2kgd_calls kfd2kgd = { - .program_sh_mem_settings = kgd_program_sh_mem_settings, - .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_interrupts = kgd_init_interrupts, - .hqd_load = kgd_hqd_load, - .hqd_sdma_load = kgd_hqd_sdma_load, - .hqd_dump = kgd_hqd_dump, - .hqd_sdma_dump = kgd_hqd_sdma_dump, - .hqd_is_occupied = kgd_hqd_is_occupied, - .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, - .hqd_destroy = kgd_hqd_destroy, - .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, - .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_pasid = - get_atc_vmid_pasid_mapping_pasid, - .get_atc_vmid_pasid_mapping_valid = - get_atc_vmid_pasid_mapping_valid, - .get_fw_version = get_fw_version, - .set_scratch_backing_va = set_scratch_backing_va, - .get_tile_config = get_tile_config, - .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, -}; - -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) -{ - return (struct kfd2kgd_calls *)&kfd2kgd; -} - static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) { return (struct amdgpu_device *)kgd; @@ -264,13 +169,15 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) return 0; } -static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m) +static inline uint32_t get_sdma_rlc_reg_offset(struct vi_sdma_mqd *m) { uint32_t retval; retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET; - pr_debug("kfd: sdma base address: 0x%x\n", retval); + + pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", + m->sdma_engine_id, m->sdma_queue_id, retval); return retval; } @@ -402,59 +309,51 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, struct amdgpu_device *adev = get_amdgpu_device(kgd); struct vi_sdma_mqd *m; unsigned long end_jiffies; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t data; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); end_jiffies = msecs_to_jiffies(2000) + jiffies; while (true) { - data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); return -ETIME; + } usleep_range(500, 1000); } - if (m->sdma_engine_id) { - data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL); - data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL, - RESUME_CTX, 0); - WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data); - } else { - data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL); - data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, - RESUME_CTX, 0); - WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); - } data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, ENABLE, 1); - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, + m->sdmax_rlcx_rb_rptr); if (read_user_wptr(mm, wptr, data)) - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, data); else - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, m->sdmax_rlcx_rb_rptr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_VIRTUAL_ADDR, m->sdmax_rlcx_virtual_addr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, m->sdmax_rlcx_rb_base_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdmax_rlcx_rb_rptr_addr_lo); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdmax_rlcx_rb_rptr_addr_hi); data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); return 0; } @@ -521,13 +420,13 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct vi_sdma_mqd *m; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); - sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) return true; @@ -645,54 +544,48 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct vi_sdma_mqd *m; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t temp; unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); while (true) { - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); return -ETIME; + } usleep_range(500, 1000); } - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, - RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); - m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); return 0; } -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid) +static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, + uint8_t vmid, uint16_t *p_pasid) { - uint32_t reg; + uint32_t value; struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; -} - -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid) -{ - uint32_t reg; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; - reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } static int kgd_address_watch_disable(struct kgd_dev *kgd) @@ -751,63 +644,6 @@ static void set_scratch_backing_va(struct kgd_dev *kgd, unlock_srbm(kgd); } -static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - const union amdgpu_firmware_header *hdr; - - switch (type) { - case KGD_ENGINE_PFP: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.pfp_fw->data; - break; - - case KGD_ENGINE_ME: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.me_fw->data; - break; - - case KGD_ENGINE_CE: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.ce_fw->data; - break; - - case KGD_ENGINE_MEC1: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec_fw->data; - break; - - case KGD_ENGINE_MEC2: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec2_fw->data; - break; - - case KGD_ENGINE_RLC: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.rlc_fw->data; - break; - - case KGD_ENGINE_SDMA1: - hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[0].fw->data; - break; - - case KGD_ENGINE_SDMA2: - hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[1].fw->data; - break; - - default: - return 0; - } - - if (hdr == NULL) - return 0; - - /* Only 12 bit in use*/ - return hdr->common.ucode_version; -} - static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base) { @@ -859,3 +695,28 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) RREG32(mmVM_INVALIDATE_RESPONSE); return 0; } + +const struct kfd2kgd_calls gfx_v8_kfd2kgd = { + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_address_watch_disable, + .address_watch_execute = kgd_address_watch_execute, + .wave_control_execute = kgd_wave_control_execute, + .address_watch_get_offset = kgd_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_info = + get_atc_vmid_pasid_mapping_info, + .set_scratch_backing_va = set_scratch_backing_va, + .get_tile_config = get_tile_config, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 5c51d4910650..47c853ef1051 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -19,19 +19,10 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ - -#define pr_fmt(fmt) "kfd2kgd: " fmt - -#include <linux/module.h> -#include <linux/fdtable.h> -#include <linux/uaccess.h> -#include <linux/firmware.h> #include <linux/mmu_context.h> -#include <drm/drmP.h> + #include "amdgpu.h" #include "amdgpu_amdkfd.h" -#include "amdgpu_ucode.h" -#include "soc15_hw_ip.h" #include "gc/gc_9_0_offset.h" #include "gc/gc_9_0_sh_mask.h" #include "vega10_enum.h" @@ -49,78 +40,20 @@ #include "soc15d.h" #include "mmhub_v1_0.h" #include "gfxhub_v1_0.h" +#include "gmc_v9_0.h" -#define V9_PIPE_PER_MEC (4) -#define V9_QUEUES_PER_PIPE_MEC (8) - enum hqd_dequeue_request_type { NO_ACTION = 0, DRAIN_PIPE, RESET_WAVES }; -/* - * Register access functions - */ - -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, - uint32_t sh_mem_config, - uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, - uint32_t sh_mem_bases); -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, - unsigned int vmid); -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm); -static int kgd_hqd_dump(struct kgd_dev *kgd, - uint32_t pipe_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, - uint32_t __user *wptr, struct mm_struct *mm); -static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, - uint32_t engine_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id); -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, - enum kfd_preempt_type reset_type, - unsigned int utimeout, uint32_t pipe_id, - uint32_t queue_id); -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, - unsigned int utimeout); -static int kgd_address_watch_disable(struct kgd_dev *kgd); -static int kgd_address_watch_execute(struct kgd_dev *kgd, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo); -static int kgd_wave_control_execute(struct kgd_dev *kgd, - uint32_t gfx_index_val, - uint32_t sq_cmd); -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, - unsigned int watch_point_id, - unsigned int reg_offset); - -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid); -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid); -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base); -static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); -static void set_scratch_backing_va(struct kgd_dev *kgd, - uint64_t va, uint32_t vmid); -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. */ -static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, +int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd, struct tile_config *config) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; @@ -138,40 +71,6 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, return 0; } -static const struct kfd2kgd_calls kfd2kgd = { - .program_sh_mem_settings = kgd_program_sh_mem_settings, - .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_interrupts = kgd_init_interrupts, - .hqd_load = kgd_hqd_load, - .hqd_sdma_load = kgd_hqd_sdma_load, - .hqd_dump = kgd_hqd_dump, - .hqd_sdma_dump = kgd_hqd_sdma_dump, - .hqd_is_occupied = kgd_hqd_is_occupied, - .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, - .hqd_destroy = kgd_hqd_destroy, - .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, - .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_pasid = - get_atc_vmid_pasid_mapping_pasid, - .get_atc_vmid_pasid_mapping_valid = - get_atc_vmid_pasid_mapping_valid, - .get_fw_version = get_fw_version, - .set_scratch_backing_va = set_scratch_backing_va, - .get_tile_config = amdgpu_amdkfd_get_tile_config, - .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, - .get_hive_id = amdgpu_amdkfd_get_hive_id, -}; - -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) -{ - return (struct kfd2kgd_calls *)&kfd2kgd; -} - static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) { return (struct amdgpu_device *)kgd; @@ -219,7 +118,7 @@ static void release_queue(struct kgd_dev *kgd) unlock_srbm(kgd); } -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, +void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, @@ -229,14 +128,14 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, lock_srbm(kgd, 0, 0, 0, vmid); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); /* APE1 no longer exists on GFX9 */ unlock_srbm(kgd); } -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, +int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -297,7 +196,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, * but still works */ -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t mec; @@ -317,22 +216,21 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) return 0; } -static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, +static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, unsigned int engine_id, unsigned int queue_id) { - uint32_t base[2] = { + uint32_t sdma_engine_reg_base[2] = { SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL }; - uint32_t retval; - - retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL - - mmSDMA0_RLC0_RB_CNTL); + uint32_t retval = sdma_engine_reg_base[engine_id] + + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); - pr_debug("sdma base address: 0x%x\n", retval); + pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, + queue_id, retval); return retval; } @@ -347,7 +245,7 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) return (struct v9_sdma_mqd *)mqd; } -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, +int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm) @@ -373,7 +271,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, ((mec << 5) | (pipe << 3) | queue_id | 0x80)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); } /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ @@ -382,13 +280,13 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, for (reg = hqd_base; reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) - WREG32(reg, mqd_hqd[reg - hqd_base]); + WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); /* Activate doorbell logic before triggering WPTR poll. */ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); if (wptr) { /* Don't read wptr with get_user because the user @@ -417,32 +315,32 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), lower_32_bits(guessed_wptr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), upper_32_bits(guessed_wptr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), lower_32_bits((uintptr_t)wptr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), upper_32_bits((uintptr_t)wptr)); WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); release_queue(kgd); return 0; } -static int kgd_hqd_dump(struct kgd_dev *kgd, +int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { @@ -479,71 +377,67 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; - uint32_t sdma_base_addr, sdmax_gfx_context_cntl; + uint32_t sdma_rlc_reg_offset; unsigned long end_jiffies; uint32_t data; uint64_t data64; uint64_t __user *wptr64 = (uint64_t __user *)wptr; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, m->sdma_queue_id); - sdmax_gfx_context_cntl = m->sdma_engine_id ? - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) : - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); end_jiffies = msecs_to_jiffies(2000) + jiffies; while (true) { - data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); return -ETIME; + } usleep_range(500, 1000); } - data = RREG32(sdmax_gfx_context_cntl); - data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, - RESUME_CTX, 0); - WREG32(sdmax_gfx_context_cntl, data); - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET, m->sdmax_rlcx_doorbell_offset); data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, ENABLE, 1); - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, + m->sdmax_rlcx_rb_rptr); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI, m->sdmax_rlcx_rb_rptr_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); if (read_user_wptr(mm, wptr64, data64)) { - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, lower_32_bits(data64)); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, upper_32_bits(data64)); } else { - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, m->sdmax_rlcx_rb_rptr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, m->sdmax_rlcx_rb_rptr_hi); } - WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, m->sdmax_rlcx_rb_base_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdmax_rlcx_rb_rptr_addr_lo); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdmax_rlcx_rb_rptr_addr_hi); data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); return 0; } @@ -553,7 +447,8 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, uint32_t (**dump)[2], uint32_t *n_regs) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id); + uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, + engine_id, queue_id); uint32_t i = 0, reg; #undef HQD_N_REGS #define HQD_N_REGS (19+6+7+10) @@ -563,15 +458,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, return -ENOMEM; for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) - DUMP_REG(sdma_base_addr + reg); + DUMP_REG(sdma_rlc_reg_offset + reg); for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) - DUMP_REG(sdma_base_addr + reg); + DUMP_REG(sdma_rlc_reg_offset + reg); for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) - DUMP_REG(sdma_base_addr + reg); + DUMP_REG(sdma_rlc_reg_offset + reg); for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) - DUMP_REG(sdma_base_addr + reg); + DUMP_REG(sdma_rlc_reg_offset + reg); WARN_ON_ONCE(i != HQD_N_REGS); *n_regs = i; @@ -579,7 +474,7 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, return 0; } -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, +bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -605,14 +500,14 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, m->sdma_queue_id); - sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) return true; @@ -620,7 +515,7 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) return false; } -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, +int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id) @@ -637,7 +532,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, acquire_queue(kgd, pipe_id, queue_id); if (m->cp_hqd_vmid == 0) - WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); + WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); switch (reset_type) { case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: @@ -651,7 +546,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, break; } - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); end_jiffies = (utimeout * HZ / 1000) + jiffies; while (true) { @@ -675,84 +570,56 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t temp; unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, m->sdma_queue_id); - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); while (true) { - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); return -ETIME; + } usleep_range(500, 1000); } - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, - RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); - m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); m->sdmax_rlcx_rb_rptr_hi = - RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI); + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI); return 0; } -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid) +bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, + uint8_t vmid, uint16_t *p_pasid) { - uint32_t reg; + uint32_t value; struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid); - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; -} + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid) -{ - uint32_t reg; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) - + vmid); - return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - /* Use legacy mode tlb invalidation. - * - * Currently on Raven the code below is broken for anything but - * legacy mode due to a MMHUB power gating problem. A workaround - * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ - * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack - * bit. - * - * TODO 1: agree on the right set of invalidation registers for - * KFD use. Use the last one for now. Invalidate both GC and - * MMHUB. - * - * TODO 2: support range-based invalidation, requires kfg2kgd - * interface change - */ - amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0); -} - -static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) +static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid, + uint32_t flush_type) { signed long r; uint32_t seq; @@ -765,7 +632,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) PACKET3_INVALIDATE_TLBS_DST_SEL(1) | PACKET3_INVALIDATE_TLBS_ALL_HUB(1) | PACKET3_INVALIDATE_TLBS_PASID(pasid) | - PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */ + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); amdgpu_fence_emit_polling(ring, &seq); amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq.ring_lock); @@ -779,52 +646,78 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) return 0; } -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) +int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) { struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int vmid; + int vmid, i; + uint16_t queried_pasid; + bool ret; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + uint32_t flush_type = 0; if (adev->in_gpu_reset) return -EIO; + if (adev->gmc.xgmi.num_physical_nodes && + adev->asic_type == CHIP_VEGA20) + flush_type = 2; if (ring->sched.ready) - return invalidate_tlbs_with_kiq(adev, pasid); + return invalidate_tlbs_with_kiq(adev, pasid, flush_type); for (vmid = 0; vmid < 16; vmid++) { if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) continue; - if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { - if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) - == pasid) { - write_vmid_invalidate_request(kgd, vmid); - break; - } + + ret = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(kgd, vmid, + &queried_pasid); + if (ret && queried_pasid == pasid) { + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, vmid, + i, flush_type); + break; } } return 0; } -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) +int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) { struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + int i; if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { pr_err("non kfd vmid %d\n", vmid); return 0; } - write_vmid_invalidate_request(kgd, vmid); + /* Use legacy mode tlb invalidation. + * + * Currently on Raven the code below is broken for anything but + * legacy mode due to a MMHUB power gating problem. A workaround + * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ + * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack + * bit. + * + * TODO 1: agree on the right set of invalidation registers for + * KFD use. Use the last one for now. Invalidate both GC and + * MMHUB. + * + * TODO 2: support range-based invalidation, requires kfg2kgd + * interface change + */ + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); + return 0; } -static int kgd_address_watch_disable(struct kgd_dev *kgd) +int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd) { return 0; } -static int kgd_address_watch_execute(struct kgd_dev *kgd, +int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, unsigned int watch_point_id, uint32_t cntl_val, uint32_t addr_hi, @@ -833,7 +726,7 @@ static int kgd_address_watch_execute(struct kgd_dev *kgd, return 0; } -static int kgd_wave_control_execute(struct kgd_dev *kgd, +int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, uint32_t gfx_index_val, uint32_t sq_cmd) { @@ -842,7 +735,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd, mutex_lock(&adev->grbm_idx_mutex); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val); + WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val); WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); data = REG_SET_FIELD(data, GRBM_GFX_INDEX, @@ -852,79 +745,20 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd, data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); + WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); mutex_unlock(&adev->grbm_idx_mutex); return 0; } -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, +uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, unsigned int watch_point_id, unsigned int reg_offset) { return 0; } -static void set_scratch_backing_va(struct kgd_dev *kgd, - uint64_t va, uint32_t vmid) -{ - /* No longer needed on GFXv9. The scratch base address is - * passed to the shader by the CP. It's the user mode driver's - * responsibility. - */ -} - -/* FIXME: Does this need to be ASIC-specific code? */ -static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - const union amdgpu_firmware_header *hdr; - - switch (type) { - case KGD_ENGINE_PFP: - hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data; - break; - - case KGD_ENGINE_ME: - hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data; - break; - - case KGD_ENGINE_CE: - hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data; - break; - - case KGD_ENGINE_MEC1: - hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data; - break; - - case KGD_ENGINE_MEC2: - hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data; - break; - - case KGD_ENGINE_RLC: - hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data; - break; - - case KGD_ENGINE_SDMA1: - hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data; - break; - - case KGD_ENGINE_SDMA2: - hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data; - break; - - default: - return 0; - } - - if (hdr == NULL) - return 0; - - /* Only 12 bit in use*/ - return hdr->common.ucode_version; -} - -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, +void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -939,7 +773,37 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, * now, all processes share the same address space size, like * on GFX8 and older. */ - mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); + if (adev->asic_type == CHIP_ARCTURUS) { + /* Two MMHUBs */ + mmhub_v9_4_setup_vm_pt_regs(adev, 0, vmid, page_table_base); + mmhub_v9_4_setup_vm_pt_regs(adev, 1, vmid, page_table_base); + } else + mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); } + +const struct kfd2kgd_calls gfx_v9_kfd2kgd = { + .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, + .init_interrupts = kgd_gfx_v9_init_interrupts, + .hqd_load = kgd_gfx_v9_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_gfx_v9_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_gfx_v9_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_gfx_v9_address_watch_disable, + .address_watch_execute = kgd_gfx_v9_address_watch_execute, + .wave_control_execute = kgd_gfx_v9_wave_control_execute, + .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_info = + kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, + .get_tile_config = kgd_gfx_v9_get_tile_config, + .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, + .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, + .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, + .get_hive_id = amdgpu_amdkfd_get_hive_id, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h new file mode 100644 index 000000000000..d9e9ad22b2bd --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -0,0 +1,65 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + + + +void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases); +int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid); +int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); +int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm); +int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id); +int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id); +int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd); +int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo); +int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd); +uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset); + +bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, + uint8_t vmid, uint16_t *p_pasid); +void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint64_t page_table_base); +int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); +int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); +int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd, + struct tile_config *config); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 1921dec3df7a..7d35b5b66229 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -19,22 +19,16 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ - -#define pr_fmt(fmt) "kfd2kgd: " fmt - +#include <linux/dma-buf.h> #include <linux/list.h> #include <linux/pagemap.h> #include <linux/sched/mm.h> -#include <linux/dma-buf.h> -#include <drm/drmP.h> +#include <linux/sched/task.h> + #include "amdgpu_object.h" #include "amdgpu_vm.h" #include "amdgpu_amdkfd.h" - -/* Special VM and GART address alignment needed for VI pre-Fiji due to - * a HW bug. - */ -#define VI_BO_SIZE_ALIGN (0x8000) +#include "amdgpu_dma_buf.h" /* BO flag to indicate a KFD userptr BO */ #define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) @@ -216,14 +210,14 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, struct amdgpu_amdkfd_fence *ef) { - struct reservation_object *resv = bo->tbo.resv; - struct reservation_object_list *old, *new; + struct dma_resv *resv = bo->tbo.base.resv; + struct dma_resv_list *old, *new; unsigned int i, j, k; if (!ef) return -EINVAL; - old = reservation_object_get_list(resv); + old = dma_resv_get_list(resv); if (!old) return 0; @@ -239,7 +233,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, struct dma_fence *f; f = rcu_dereference_protected(old->shared[i], - reservation_object_held(resv)); + dma_resv_held(resv)); if (f->context == ef->base.context) RCU_INIT_POINTER(new->shared[--j], f); @@ -261,7 +255,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, struct dma_fence *f; f = rcu_dereference_protected(new->shared[i], - reservation_object_held(resv)); + dma_resv_held(resv)); dma_fence_put(f); } kfree_rcu(old, rcu); @@ -347,13 +341,46 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); int ret; - ret = amdgpu_vm_update_directories(adev, vm); + ret = amdgpu_vm_update_pdes(adev, vm, false); if (ret) return ret; return amdgpu_sync_fence(NULL, sync, vm->last_update, false); } +static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) +{ + struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); + bool coherent = mem->alloc_flags & ALLOC_MEM_FLAGS_COHERENT; + uint32_t mapping_flags; + + mapping_flags = AMDGPU_VM_PAGE_READABLE; + if (mem->alloc_flags & ALLOC_MEM_FLAGS_WRITABLE) + mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; + if (mem->alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE) + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; + + switch (adev->asic_type) { + case CHIP_ARCTURUS: + if (mem->alloc_flags & ALLOC_MEM_FLAGS_VRAM) { + if (bo_adev == adev) + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; + else + mapping_flags |= AMDGPU_VM_MTYPE_UC; + } else { + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + } + break; + default: + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + } + + return amdgpu_gem_va_map_flags(adev, mapping_flags); +} + /* add_bo_to_vm - Add a BO to a VM * * Everything that needs to bo done only once when a BO is first added @@ -402,23 +429,14 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, } bo_va_entry->va = va; - bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev, - mem->mapping_flags); + bo_va_entry->pte_flags = get_pte_flags(adev, mem); bo_va_entry->kgd_dev = (void *)adev; list_add(&bo_va_entry->bo_list, list_bo_va); if (p_bo_va_entry) *p_bo_va_entry = bo_va_entry; - /* Allocate new page tables if needed and validate - * them. - */ - ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo)); - if (ret) { - pr_err("Failed to allocate pts, err=%d\n", ret); - goto err_alloc_pts; - } - + /* Allocate validate page tables if needed */ ret = vm_validate_pt_pd_bos(vm); if (ret) { pr_err("validate_pt_pd_bos() failed\n"); @@ -464,6 +482,17 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, mutex_unlock(&process_info->lock); } +static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem, + struct amdkfd_process_info *process_info) +{ + struct ttm_validate_buffer *bo_list_entry; + + bo_list_entry = &mem->validate_list; + mutex_lock(&process_info->lock); + list_del(&bo_list_entry->head); + mutex_unlock(&process_info->lock); +} + /* Initializes user pages. It registers the MMU notifier and validates * the userptr BO in the GTT domain. * @@ -476,8 +505,7 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, * * Returns 0 for success, negative errno for errors. */ -static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, - uint64_t user_addr) +static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr) { struct amdkfd_process_info *process_info = mem->process_info; struct amdgpu_bo *bo = mem->bo; @@ -499,28 +527,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, goto out; } - /* If no restore worker is running concurrently, user_pages - * should not be allocated - */ - WARN(mem->user_pages, "Leaking user_pages array"); - - mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL | __GFP_ZERO); - if (!mem->user_pages) { - pr_err("%s: Failed to allocate pages array\n", __func__); - ret = -ENOMEM; - goto unregister_out; - } - - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); + ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); if (ret) { pr_err("%s: Failed to get user pages: %d\n", __func__, ret); - goto free_out; + goto unregister_out; } - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); - ret = amdgpu_bo_reserve(bo, true); if (ret) { pr_err("%s: Failed to reserve BO\n", __func__); @@ -533,11 +545,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, amdgpu_bo_unreserve(bo); release_out: - if (ret) - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); -free_out: - kvfree(mem->user_pages); - mem->user_pages = NULL; + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); unregister_out: if (ret) amdgpu_mn_unregister(bo); @@ -596,7 +604,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; ctx->kfd_bo.tv.num_shared = 1; - ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); @@ -660,7 +667,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; ctx->kfd_bo.tv.num_shared = 1; - ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); i = 0; @@ -741,13 +747,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, struct amdgpu_sync *sync) { int ret; - struct amdgpu_vm *vm; - struct amdgpu_bo_va *bo_va; - struct amdgpu_bo *bo; - - bo_va = entry->bo_va; - vm = bo_va->base.vm; - bo = bo_va->base.bo; + struct amdgpu_bo_va *bo_va = entry->bo_va; /* Update the page tables */ ret = amdgpu_vm_bo_update(adev, bo_va, false); @@ -835,8 +835,8 @@ static int process_sync_pds_resv(struct amdkfd_process_info *process_info, struct amdgpu_bo *pd = peer_vm->root.base.bo; ret = amdgpu_sync_resv(NULL, - sync, pd->tbo.resv, - AMDGPU_FENCE_OWNER_UNDEFINED, false); + sync, pd->tbo.base.resv, + AMDGPU_FENCE_OWNER_KFD, false); if (ret) return ret; } @@ -906,9 +906,13 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, pr_err("validate_pt_pd_bos() failed\n"); goto validate_pd_fail; } - amdgpu_bo_sync_wait(vm->root.base.bo, AMDGPU_FENCE_OWNER_KFD, false); + ret = amdgpu_bo_sync_wait(vm->root.base.bo, + AMDGPU_FENCE_OWNER_KFD, false); if (ret) goto wait_pd_fail; + ret = dma_resv_reserve_shared(vm->root.base.bo->tbo.base.resv, 1); + if (ret) + goto reserve_shared_fail; amdgpu_bo_fence(vm->root.base.bo, &vm->process_info->eviction_fence->base, true); amdgpu_bo_unreserve(vm->root.base.bo); @@ -922,6 +926,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, return 0; +reserve_shared_fail: wait_pd_fail: validate_pd_fail: amdgpu_bo_unreserve(vm->root.base.bo); @@ -1097,10 +1102,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( uint64_t user_addr = 0; struct amdgpu_bo *bo; struct amdgpu_bo_param bp; - int byte_align; u32 domain, alloc_domain; u64 alloc_flags; - uint32_t mapping_flags; int ret; /* @@ -1108,7 +1111,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( */ if (flags & ALLOC_MEM_FLAGS_VRAM) { domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; - alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; + alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : AMDGPU_GEM_CREATE_NO_CPU_ACCESS; @@ -1121,8 +1124,9 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( alloc_flags = 0; if (!offset || !*offset) return -EINVAL; - user_addr = *offset; - } else if (flags & ALLOC_MEM_FLAGS_DOORBELL) { + user_addr = untagged_addr(*offset); + } else if (flags & (ALLOC_MEM_FLAGS_DOORBELL | + ALLOC_MEM_FLAGS_MMIO_REMAP)) { domain = AMDGPU_GEM_DOMAIN_GTT; alloc_domain = AMDGPU_GEM_DOMAIN_CPU; bo_type = ttm_bo_type_sg; @@ -1152,24 +1156,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if ((*mem)->aql_queue) size = size >> 1; - /* Workaround for TLB bug on older VI chips */ - byte_align = (adev->family == AMDGPU_FAMILY_VI && - adev->asic_type != CHIP_FIJI && - adev->asic_type != CHIP_POLARIS10 && - adev->asic_type != CHIP_POLARIS11 && - adev->asic_type != CHIP_POLARIS12) ? - VI_BO_SIZE_ALIGN : 1; - - mapping_flags = AMDGPU_VM_PAGE_READABLE; - if (flags & ALLOC_MEM_FLAGS_WRITABLE) - mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; - if (flags & ALLOC_MEM_FLAGS_EXECUTABLE) - mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; - if (flags & ALLOC_MEM_FLAGS_COHERENT) - mapping_flags |= AMDGPU_VM_MTYPE_UC; - else - mapping_flags |= AMDGPU_VM_MTYPE_NC; - (*mem)->mapping_flags = mapping_flags; + (*mem)->alloc_flags = flags; amdgpu_sync_create(&(*mem)->sync); @@ -1184,7 +1171,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( memset(&bp, 0, sizeof(bp)); bp.size = size; - bp.byte_align = byte_align; + bp.byte_align = 1; bp.domain = alloc_domain; bp.flags = alloc_flags; bp.type = bo_type; @@ -1211,13 +1198,9 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); if (user_addr) { - ret = init_user_pages(*mem, current->mm, user_addr); - if (ret) { - mutex_lock(&avm->process_info->lock); - list_del(&(*mem)->validate_list.head); - mutex_unlock(&avm->process_info->lock); + ret = init_user_pages(*mem, user_addr); + if (ret) goto allocate_init_user_pages_failed; - } } if (offset) @@ -1226,6 +1209,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( return 0; allocate_init_user_pages_failed: + remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); amdgpu_bo_unref(&bo); /* Don't unreserve system mem limit twice */ goto err_reserve_limit; @@ -1275,15 +1259,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( list_del(&bo_list_entry->head); mutex_unlock(&process_info->lock); - /* Free user pages if necessary */ - if (mem->user_pages) { - pr_debug("%s: Freeing user_pages array\n", __func__); - if (mem->user_pages[0]) - release_pages(mem->user_pages, - mem->bo->tbo.ttm->num_pages); - kvfree(mem->user_pages); - } - ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); if (unlikely(ret)) return ret; @@ -1307,8 +1282,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( /* Free the sync object */ amdgpu_sync_free(&mem->sync); - /* If the SG is not NULL, it's one we created for a doorbell - * BO. We need to free it. + /* If the SG is not NULL, it's one we created for a doorbell or mmio + * remap BO. We need to free it. */ if (mem->bo->tbo.sg) { sg_free_table(mem->bo->tbo.sg); @@ -1422,7 +1397,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( ret = map_bo_to_gpuvm(adev, entry, ctx.sync, is_invalid_userptr); if (ret) { - pr_err("Failed to map radeon bo to gpuvm\n"); + pr_err("Failed to map bo to gpuvm\n"); goto map_bo_to_gpuvm_failed; } @@ -1654,9 +1629,10 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, INIT_LIST_HEAD(&(*mem)->bo_va_list); mutex_init(&(*mem)->lock); - (*mem)->mapping_flags = - AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | - AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_NC; + (*mem)->alloc_flags = + ((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? + ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT) | + ALLOC_MEM_FLAGS_WRITABLE | ALLOC_MEM_FLAGS_EXECUTABLE; (*mem)->bo = amdgpu_bo_ref(bo); (*mem)->va = va; @@ -1757,36 +1733,24 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, bo = mem->bo; - if (!mem->user_pages) { - mem->user_pages = - kvmalloc_array(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL | __GFP_ZERO); - if (!mem->user_pages) { - pr_err("%s: Failed to allocate pages array\n", - __func__); - return -ENOMEM; - } - } else if (mem->user_pages[0]) { - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); - } - /* Get updated user pages */ - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, - mem->user_pages); + ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); if (ret) { - mem->user_pages[0] = NULL; - pr_info("%s: Failed to get user pages: %d\n", + pr_debug("%s: Failed to get user pages: %d\n", __func__, ret); - /* Pretend it succeeded. It will fail later - * with a VM fault if the GPU tries to access - * it. Better than hanging indefinitely with - * stalled user mode queues. - */ + + /* Return error -EBUSY or -ENOMEM, retry restore */ + return ret; } + /* + * FIXME: Cannot ignore the return code, must hold + * notifier_lock + */ + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + /* Mark the BO as valid unless it was invalidated - * again concurrently + * again concurrently. */ if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) return -EAGAIN; @@ -1819,7 +1783,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) GFP_KERNEL); if (!pd_bo_list_entries) { pr_err("%s: Failed to allocate PD BO list entries\n", __func__); - return -ENOMEM; + ret = -ENOMEM; + goto out_no_mem; } INIT_LIST_HEAD(&resv_list); @@ -1843,7 +1808,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); WARN(!list_empty(&duplicates), "Duplicates should be empty"); if (ret) - goto out; + goto out_free; amdgpu_sync_create(&sync); @@ -1859,10 +1824,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) bo = mem->bo; - /* Copy pages array and validate the BO if we got user pages */ - if (mem->user_pages[0]) { - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, - mem->user_pages); + /* Validate the BO if we got user pages */ + if (bo->tbo.ttm->pages[0]) { amdgpu_bo_placement_from_domain(bo, mem->domain); ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (ret) { @@ -1871,13 +1834,6 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) } } - /* Validate succeeded, now the BO owns the pages, free - * our copy of the pointer array. Put this BO back on - * the userptr_valid_list. If we need to revalidate - * it, we need to start from scratch. - */ - kvfree(mem->user_pages); - mem->user_pages = NULL; list_move_tail(&mem->validate_list.head, &process_info->userptr_valid_list); @@ -1910,8 +1866,9 @@ unreserve_out: ttm_eu_backoff_reservation(&ticket, &resv_list); amdgpu_sync_wait(&sync, false); amdgpu_sync_free(&sync); -out: +out_free: kfree(pd_bo_list_entries); +out_no_mem: return ret; } @@ -1976,6 +1933,7 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) * hanging. No point trying again. */ } + unlock_out: mutex_unlock(&process_info->lock); mmput(mm); @@ -2143,3 +2101,92 @@ ttm_reserve_fail: kfree(pd_bo_list); return ret; } + +int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem) +{ + struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info; + struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws; + int ret; + + if (!info || !gws) + return -EINVAL; + + *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); + if (!*mem) + return -ENOMEM; + + mutex_init(&(*mem)->lock); + (*mem)->bo = amdgpu_bo_ref(gws_bo); + (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS; + (*mem)->process_info = process_info; + add_kgd_mem_to_kfd_bo_list(*mem, process_info, false); + amdgpu_sync_create(&(*mem)->sync); + + + /* Validate gws bo the first time it is added to process */ + mutex_lock(&(*mem)->process_info->lock); + ret = amdgpu_bo_reserve(gws_bo, false); + if (unlikely(ret)) { + pr_err("Reserve gws bo failed %d\n", ret); + goto bo_reservation_failure; + } + + ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true); + if (ret) { + pr_err("GWS BO validate failed %d\n", ret); + goto bo_validation_failure; + } + /* GWS resource is shared b/t amdgpu and amdkfd + * Add process eviction fence to bo so they can + * evict each other. + */ + ret = dma_resv_reserve_shared(gws_bo->tbo.base.resv, 1); + if (ret) + goto reserve_shared_fail; + amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true); + amdgpu_bo_unreserve(gws_bo); + mutex_unlock(&(*mem)->process_info->lock); + + return ret; + +reserve_shared_fail: +bo_validation_failure: + amdgpu_bo_unreserve(gws_bo); +bo_reservation_failure: + mutex_unlock(&(*mem)->process_info->lock); + amdgpu_sync_free(&(*mem)->sync); + remove_kgd_mem_from_kfd_bo_list(*mem, process_info); + amdgpu_bo_unref(&gws_bo); + mutex_destroy(&(*mem)->lock); + kfree(*mem); + *mem = NULL; + return ret; +} + +int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem) +{ + int ret; + struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info; + struct kgd_mem *kgd_mem = (struct kgd_mem *)mem; + struct amdgpu_bo *gws_bo = kgd_mem->bo; + + /* Remove BO from process's validate list so restore worker won't touch + * it anymore + */ + remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info); + + ret = amdgpu_bo_reserve(gws_bo, false); + if (unlikely(ret)) { + pr_err("Reserve gws bo failed %d\n", ret); + //TODO add BO back to validate_list? + return ret; + } + amdgpu_amdkfd_remove_eviction_fence(gws_bo, + process_info->eviction_fence); + amdgpu_bo_unreserve(gws_bo); + amdgpu_sync_free(&kgd_mem->sync); + amdgpu_bo_unref(&gws_bo); + mutex_destroy(&kgd_mem->lock); + kfree(mem); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index e02781b37e73..72232fccf61a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -23,7 +23,7 @@ * Authors: Dave Airlie * Alex Deucher */ -#include <drm/drmP.h> + #include <drm/amdgpu_drm.h> #include "amdgpu.h" #include "amdgpu_atombios.h" @@ -2038,6 +2038,11 @@ int amdgpu_atombios_init(struct amdgpu_device *adev) if (adev->is_atom_fw) { amdgpu_atomfirmware_scratch_regs_init(adev); amdgpu_atomfirmware_allocate_fb_scratch(adev); + ret = amdgpu_atomfirmware_get_mem_train_fb_loc(adev); + if (ret) { + DRM_ERROR("Failed to get mem train fb location.\n"); + return ret; + } } else { amdgpu_atombios_scratch_regs_init(adev); amdgpu_atombios_allocate_fb_scratch(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index b61e1dc61b4c..ff4eb96bdfb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -20,15 +20,14 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include <drm/drmP.h> + #include <drm/amdgpu_drm.h> #include "amdgpu.h" #include "atomfirmware.h" #include "amdgpu_atomfirmware.h" #include "atom.h" #include "atombios.h" - -#define get_index_into_master_table(master_table, table_name) (offsetof(struct master_table, table_name) / sizeof(uint16_t)) +#include "soc15_hw_ip.h" bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev) { @@ -120,38 +119,16 @@ union umc_info { union vram_info { struct atom_vram_info_header_v2_3 v23; + struct atom_vram_info_header_v2_4 v24; }; -/* - * Return vram width from integrated system info table, if available, - * or 0 if not. - */ -int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev) -{ - struct amdgpu_mode_info *mode_info = &adev->mode_info; - int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, - integratedsysteminfo); - u16 data_offset, size; - union igp_info *igp_info; - u8 frev, crev; - - /* get any igp specific overrides */ - if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size, - &frev, &crev, &data_offset)) { - igp_info = (union igp_info *) - (mode_info->atom_context->bios + data_offset); - switch (crev) { - case 11: - return igp_info->v11.umachannelnumber * 64; - default: - return 0; - } - } - return 0; -} +union vram_module { + struct atom_vram_module_v9 v9; + struct atom_vram_module_v10 v10; +}; -static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev, - int atom_mem_type) +static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev, + int atom_mem_type) { int vram_type; @@ -181,6 +158,9 @@ static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev, case ATOM_DGPU_VRAM_TYPE_HBM2: vram_type = AMDGPU_VRAM_TYPE_HBM; break; + case ATOM_DGPU_VRAM_TYPE_GDDR6: + vram_type = AMDGPU_VRAM_TYPE_GDDR6; + break; default: vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; break; @@ -189,19 +169,25 @@ static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev, return vram_type; } -/* - * Return vram type from either integrated system info table - * or umc info table, if available, or 0 (TYPE_UNKNOWN) if not - */ -int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev) + + +int +amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, + int *vram_width, int *vram_type, + int *vram_vendor) { struct amdgpu_mode_info *mode_info = &adev->mode_info; - int index; + int index, i = 0; u16 data_offset, size; union igp_info *igp_info; union vram_info *vram_info; + union vram_module *vram_module; u8 frev, crev; u8 mem_type; + u8 mem_vendor; + u32 mem_channel_number; + u32 mem_channel_width; + u32 module_id; if (adev->flags & AMD_IS_APU) index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, @@ -209,6 +195,7 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev) else index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, vram_info); + if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size, &frev, &crev, &data_offset)) { @@ -217,31 +204,137 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev) (mode_info->atom_context->bios + data_offset); switch (crev) { case 11: + mem_channel_number = igp_info->v11.umachannelnumber; + /* channel width is 64 */ + if (vram_width) + *vram_width = mem_channel_number * 64; mem_type = igp_info->v11.memorytype; - return convert_atom_mem_type_to_vram_type(adev, mem_type); + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + break; default: - return 0; + return -EINVAL; } } else { vram_info = (union vram_info *) (mode_info->atom_context->bios + data_offset); + module_id = (RREG32(adev->bios_scratch_reg_offset + 4) & 0x00ff0000) >> 16; switch (crev) { case 3: - mem_type = vram_info->v23.vram_module[0].memory_type; - return convert_atom_mem_type_to_vram_type(adev, mem_type); + if (module_id > vram_info->v23.vram_module_num) + module_id = 0; + vram_module = (union vram_module *)vram_info->v23.vram_module; + while (i < module_id) { + vram_module = (union vram_module *) + ((u8 *)vram_module + vram_module->v9.vram_module_size); + i++; + } + mem_type = vram_module->v9.memory_type; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + mem_channel_number = vram_module->v9.channel_num; + mem_channel_width = vram_module->v9.channel_width; + if (vram_width) + *vram_width = mem_channel_number * (1 << mem_channel_width); + mem_vendor = (vram_module->v9.vender_rev_id) & 0xF; + if (vram_vendor) + *vram_vendor = mem_vendor; + break; + case 4: + if (module_id > vram_info->v24.vram_module_num) + module_id = 0; + vram_module = (union vram_module *)vram_info->v24.vram_module; + while (i < module_id) { + vram_module = (union vram_module *) + ((u8 *)vram_module + vram_module->v10.vram_module_size); + i++; + } + mem_type = vram_module->v10.memory_type; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + mem_channel_number = vram_module->v10.channel_num; + mem_channel_width = vram_module->v10.channel_width; + if (vram_width) + *vram_width = mem_channel_number * (1 << mem_channel_width); + mem_vendor = (vram_module->v10.vender_rev_id) & 0xF; + if (vram_vendor) + *vram_vendor = mem_vendor; + break; default: - return 0; + return -EINVAL; } } + } return 0; } +/* + * Return true if vbios enabled ecc by default, if umc info table is available + * or false if ecc is not enabled or umc info table is not available + */ +bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev) +{ + struct amdgpu_mode_info *mode_info = &adev->mode_info; + int index; + u16 data_offset, size; + union umc_info *umc_info; + u8 frev, crev; + bool ecc_default_enabled = false; + + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + umc_info); + + if (amdgpu_atom_parse_data_header(mode_info->atom_context, + index, &size, &frev, &crev, &data_offset)) { + /* support umc_info 3.1+ */ + if ((frev == 3 && crev >= 1) || (frev > 3)) { + umc_info = (union umc_info *) + (mode_info->atom_context->bios + data_offset); + ecc_default_enabled = + (le32_to_cpu(umc_info->v31.umc_config) & + UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false; + } + } + + return ecc_default_enabled; +} + union firmware_info { struct atom_firmware_info_v3_1 v31; }; +/* + * Return true if vbios supports sram ecc or false if not + */ +bool amdgpu_atomfirmware_sram_ecc_supported(struct amdgpu_device *adev) +{ + struct amdgpu_mode_info *mode_info = &adev->mode_info; + int index; + u16 data_offset, size; + union firmware_info *firmware_info; + u8 frev, crev; + bool sram_ecc_supported = false; + + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + firmwareinfo); + + if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, + index, &size, &frev, &crev, &data_offset)) { + /* support firmware_info 3.1 + */ + if ((frev == 3 && crev >=1) || (frev > 3)) { + firmware_info = (union firmware_info *) + (mode_info->atom_context->bios + data_offset); + sram_ecc_supported = + (le32_to_cpu(firmware_info->v31.firmware_capability) & + ATOM_FIRMWARE_CAP_SRAM_ECC) ? true : false; + } + } + + return sram_ecc_supported; +} + union smu_info { struct atom_smu_info_v3_1 v31; }; @@ -346,11 +439,11 @@ int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev) (mode_info->atom_context->bios + data_offset); switch (crev) { case 4: - adev->gfx.config.max_shader_engines = gfx_info->v24.gc_num_se; - adev->gfx.config.max_cu_per_sh = gfx_info->v24.gc_num_cu_per_sh; - adev->gfx.config.max_sh_per_se = gfx_info->v24.gc_num_sh_per_se; - adev->gfx.config.max_backends_per_se = gfx_info->v24.gc_num_rb_per_se; - adev->gfx.config.max_texture_channel_caches = gfx_info->v24.gc_num_tccs; + adev->gfx.config.max_shader_engines = gfx_info->v24.max_shader_engines; + adev->gfx.config.max_cu_per_sh = gfx_info->v24.max_cu_per_sh; + adev->gfx.config.max_sh_per_se = gfx_info->v24.max_sh_per_se; + adev->gfx.config.max_backends_per_se = gfx_info->v24.max_backends_per_se; + adev->gfx.config.max_texture_channel_caches = gfx_info->v24.max_texture_channel_caches; adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v24.gc_num_gprs); adev->gfx.config.max_gs_threads = gfx_info->v24.gc_num_max_gs_thds; adev->gfx.config.gs_vgt_table_depth = gfx_info->v24.gc_gs_table_depth; @@ -370,3 +463,138 @@ int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev) } return -EINVAL; } + +/* + * Check if VBIOS supports GDDR6 training data save/restore + */ +static bool gddr6_mem_train_vbios_support(struct amdgpu_device *adev) +{ + uint16_t data_offset; + int index; + + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + firmwareinfo); + if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, NULL, + NULL, NULL, &data_offset)) { + struct atom_firmware_info_v3_1 *firmware_info = + (struct atom_firmware_info_v3_1 *)(adev->mode_info.atom_context->bios + + data_offset); + + DRM_DEBUG("atom firmware capability:0x%08x.\n", + le32_to_cpu(firmware_info->firmware_capability)); + + if (le32_to_cpu(firmware_info->firmware_capability) & + ATOM_FIRMWARE_CAP_ENABLE_2STAGE_BIST_TRAINING) + return true; + } + + return false; +} + +static int gddr6_mem_train_support(struct amdgpu_device *adev) +{ + int ret; + uint32_t major, minor, revision, hw_v; + + if (gddr6_mem_train_vbios_support(adev)) { + amdgpu_discovery_get_ip_version(adev, MP0_HWID, &major, &minor, &revision); + hw_v = HW_REV(major, minor, revision); + /* + * treat 0 revision as a special case since register for MP0 and MMHUB is missing + * for some Navi10 A0, preventing driver from discovering the hwip information since + * none of the functions will be initialized, it should not cause any problems + */ + switch (hw_v) { + case HW_REV(11, 0, 0): + case HW_REV(11, 0, 5): + ret = 1; + break; + default: + DRM_ERROR("memory training vbios supports but psp hw(%08x)" + " doesn't support!\n", hw_v); + ret = -1; + break; + } + } else { + ret = 0; + hw_v = -1; + } + + + DRM_DEBUG("mp0 hw_v %08x, ret:%d.\n", hw_v, ret); + return ret; +} + +int amdgpu_atomfirmware_get_mem_train_fb_loc(struct amdgpu_device *adev) +{ + struct atom_context *ctx = adev->mode_info.atom_context; + unsigned char *bios = ctx->bios; + struct vram_reserve_block *reserved_block; + int index, block_number; + uint8_t frev, crev; + uint16_t data_offset, size; + uint32_t start_address_in_kb; + uint64_t offset; + int ret; + + adev->fw_vram_usage.mem_train_support = false; + + if (adev->asic_type != CHIP_NAVI10 && + adev->asic_type != CHIP_NAVI14) + return 0; + + if (amdgpu_sriov_vf(adev)) + return 0; + + ret = gddr6_mem_train_support(adev); + if (ret == -1) + return -EINVAL; + else if (ret == 0) + return 0; + + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + vram_usagebyfirmware); + ret = amdgpu_atom_parse_data_header(ctx, index, &size, &frev, &crev, + &data_offset); + if (ret == 0) { + DRM_ERROR("parse data header failed.\n"); + return -EINVAL; + } + + DRM_DEBUG("atom firmware common table header size:0x%04x, frev:0x%02x," + " crev:0x%02x, data_offset:0x%04x.\n", size, frev, crev, data_offset); + /* only support 2.1+ */ + if (((uint16_t)frev << 8 | crev) < 0x0201) { + DRM_ERROR("frev:0x%02x, crev:0x%02x < 2.1 !\n", frev, crev); + return -EINVAL; + } + + reserved_block = (struct vram_reserve_block *) + (bios + data_offset + sizeof(struct atom_common_table_header)); + block_number = ((unsigned int)size - sizeof(struct atom_common_table_header)) + / sizeof(struct vram_reserve_block); + reserved_block += (block_number > 0) ? block_number-1 : 0; + DRM_DEBUG("block_number:0x%04x, last block: 0x%08xkb sz, %dkb fw, %dkb drv.\n", + block_number, + le32_to_cpu(reserved_block->start_address_in_kb), + le16_to_cpu(reserved_block->used_by_firmware_in_kb), + le16_to_cpu(reserved_block->used_by_driver_in_kb)); + if (reserved_block->used_by_firmware_in_kb > 0) { + start_address_in_kb = le32_to_cpu(reserved_block->start_address_in_kb); + offset = (uint64_t)start_address_in_kb * ONE_KiB; + if ((offset & (ONE_MiB - 1)) < (4 * ONE_KiB + 1) ) { + offset -= ONE_MiB; + } + + offset &= ~(ONE_MiB - 1); + adev->fw_vram_usage.mem_train_fb_loc = offset; + adev->fw_vram_usage.mem_train_support = true; + DRM_DEBUG("mem_train_fb_loc:0x%09llx.\n", offset); + ret = 0; + } else { + DRM_ERROR("used_by_firmware_in_kb is 0!\n"); + ret = -EINVAL; + } + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h index 20f158fd3b76..f871af5ea6f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h @@ -24,12 +24,17 @@ #ifndef __AMDGPU_ATOMFIRMWARE_H__ #define __AMDGPU_ATOMFIRMWARE_H__ +#define get_index_into_master_table(master_table, table_name) (offsetof(struct master_table, table_name) / sizeof(uint16_t)) + bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev); void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev); int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev); -int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev); -int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev); +int amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, + int *vram_width, int *vram_type, int *vram_vendor); +int amdgpu_atomfirmware_get_mem_train_fb_loc(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev); +bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev); +bool amdgpu_atomfirmware_sram_ecc_supported(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index 92b11de19581..a97fb759e2f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2010 Red Hat Inc. * Author : Dave Airlie <airlied@redhat.com> * - * Licensed under GPLv2 - * * ATPX support for both Intel/ATI */ #include <linux/vga_switcheroo.h> @@ -575,6 +574,7 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = { { 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX }, + { 0x1002, 0x699f, 0x1028, 0x0814, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1025, 0x125A, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x17AA, 0x3806, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0, 0, 0, 0, 0 }, @@ -613,17 +613,7 @@ static bool amdgpu_atpx_detect(void) bool d3_supported = false; struct pci_dev *parent_pdev; - while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) { - vga_count++; - - has_atpx |= (amdgpu_atpx_pci_probe_handle(pdev) == true); - - parent_pdev = pci_upstream_bridge(pdev); - d3_supported |= parent_pdev && parent_pdev->bridge_d3; - amdgpu_atpx_get_quirks(pdev); - } - - while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) { + while ((pdev = pci_get_class(PCI_BASE_CLASS_DISPLAY << 16, pdev)) != NULL) { vga_count++; has_atpx |= (amdgpu_atpx_pci_probe_handle(pdev) == true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 3079ea8523c5..d1495e1c9289 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -21,7 +21,7 @@ * * Authors: Jerome Glisse */ -#include <drm/drmP.h> + #include <drm/amdgpu_drm.h> #include "amdgpu.h" @@ -33,7 +33,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, { unsigned long start_jiffies; unsigned long end_jiffies; - struct dma_fence *fence = NULL; + struct dma_fence *fence; int i, r; start_jiffies = jiffies; @@ -44,16 +44,14 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, if (r) goto exit_do_move; r = dma_fence_wait(fence, false); + dma_fence_put(fence); if (r) goto exit_do_move; - dma_fence_put(fence); } end_jiffies = jiffies; r = jiffies_to_msecs(end_jiffies - start_jiffies); exit_do_move: - if (fence) - dma_fence_put(fence); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index a5df80d50d44..50dff69a0f6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -25,10 +25,11 @@ * Alex Deucher * Jerome Glisse */ -#include <drm/drmP.h> + #include "amdgpu.h" #include "atom.h" +#include <linux/pci.h> #include <linux/slab.h> #include <linux/acpi.h> /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 5c79da8e1150..85b0515c0fdc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -28,7 +28,8 @@ * Christian König <deathsimple@vodafone.de> */ -#include <drm/drmP.h> +#include <linux/uaccess.h> + #include "amdgpu.h" #include "amdgpu_trace.h" @@ -81,9 +82,9 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, return -ENOMEM; kref_init(&list->refcount); - list->gds_obj = adev->gds.gds_gfx_bo; - list->gws_obj = adev->gds.gws_gfx_bo; - list->oa_obj = adev->gds.oa_gfx_bo; + list->gds_obj = NULL; + list->gws_obj = NULL; + list->oa_obj = NULL; array = amdgpu_bo_list_array_entry(list, 0); memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry)); @@ -139,7 +140,12 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, return 0; error_free: - while (i--) { + for (i = 0; i < last_entry; ++i) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo); + + amdgpu_bo_unref(&bo); + } + for (i = first_userptr; i < num_entries; ++i) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo); amdgpu_bo_unref(&bo); @@ -269,7 +275,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, r = amdgpu_bo_create_list_entry_array(&args->in, &info); if (r) - goto error_free; + return r; switch (args->in.operation) { case AMDGPU_BO_LIST_OP_CREATE: @@ -282,8 +288,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL); mutex_unlock(&fpriv->bo_list_lock); if (r < 0) { - amdgpu_bo_list_put(list); - return r; + goto error_put_list; } handle = r; @@ -305,9 +310,8 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, mutex_unlock(&fpriv->bo_list_lock); if (IS_ERR(old)) { - amdgpu_bo_list_put(list); r = PTR_ERR(old); - goto error_free; + goto error_put_list; } amdgpu_bo_list_put(old); @@ -324,8 +328,10 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, return 0; +error_put_list: + amdgpu_bo_list_put(list); + error_free: - if (info) - kvfree(info); + kvfree(info); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 7c5f5d1601e6..a130e766cbdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry { struct amdgpu_bo_va *bo_va; uint32_t priority; struct page **user_pages; - int user_invalidated; + bool user_invalidated; }; struct amdgpu_bo_list { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 387f1cf1dc20..031b094607bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -22,8 +22,9 @@ * */ #include <linux/list.h> +#include <linux/pci.h> #include <linux/slab.h> -#include <drm/drmP.h> + #include <linux/firmware.h> #include <drm/amdgpu_drm.h> #include "amdgpu.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index bf04c12bd324..a62cbc8199de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -23,7 +23,7 @@ * Authors: Dave Airlie * Alex Deucher */ -#include <drm/drmP.h> + #include <drm/drm_edid.h> #include <drm/drm_fb_helper.h> #include <drm/drm_probe_helper.h> @@ -217,11 +217,10 @@ amdgpu_connector_update_scratch_regs(struct drm_connector *connector, struct drm_encoder *encoder; const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private; bool connected; - int i; best_encoder = connector_funcs->best_encoder(connector); - drm_connector_for_each_possible_encoder(connector, encoder, i) { + drm_connector_for_each_possible_encoder(connector, encoder) { if ((encoder == best_encoder) && (status == connector_status_connected)) connected = true; else @@ -236,9 +235,8 @@ amdgpu_connector_find_encoder(struct drm_connector *connector, int encoder_type) { struct drm_encoder *encoder; - int i; - drm_connector_for_each_possible_encoder(connector, encoder, i) { + drm_connector_for_each_possible_encoder(connector, encoder) { if (encoder->encoder_type == encoder_type) return encoder; } @@ -347,10 +345,9 @@ static struct drm_encoder * amdgpu_connector_best_single_encoder(struct drm_connector *connector) { struct drm_encoder *encoder; - int i; /* pick the first one */ - drm_connector_for_each_possible_encoder(connector, encoder, i) + drm_connector_for_each_possible_encoder(connector, encoder) return encoder; return NULL; @@ -1022,8 +1019,12 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) */ if (amdgpu_connector->shared_ddc && (ret == connector_status_connected)) { struct drm_connector *list_connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *list_amdgpu_connector; - list_for_each_entry(list_connector, &dev->mode_config.connector_list, head) { + + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(list_connector, + &iter) { if (connector == list_connector) continue; list_amdgpu_connector = to_amdgpu_connector(list_connector); @@ -1040,6 +1041,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) } } } + drm_connector_list_iter_end(&iter); } } } @@ -1065,9 +1067,8 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) /* find analog encoder */ if (amdgpu_connector->dac_load_detect) { struct drm_encoder *encoder; - int i; - drm_connector_for_each_possible_encoder(connector, encoder, i) { + drm_connector_for_each_possible_encoder(connector, encoder) { if (encoder->encoder_type != DRM_MODE_ENCODER_DAC && encoder->encoder_type != DRM_MODE_ENCODER_TVDAC) continue; @@ -1117,9 +1118,8 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); struct drm_encoder *encoder; - int i; - drm_connector_for_each_possible_encoder(connector, encoder, i) { + drm_connector_for_each_possible_encoder(connector, encoder) { if (amdgpu_connector->use_digital == true) { if (encoder->encoder_type == DRM_MODE_ENCODER_TMDS) return encoder; @@ -1134,7 +1134,7 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector) /* then check use digitial */ /* pick the first one */ - drm_connector_for_each_possible_encoder(connector, encoder, i) + drm_connector_for_each_possible_encoder(connector, encoder) return encoder; return NULL; @@ -1271,9 +1271,8 @@ u16 amdgpu_connector_encoder_get_dp_bridge_encoder_id(struct drm_connector *conn { struct drm_encoder *encoder; struct amdgpu_encoder *amdgpu_encoder; - int i; - drm_connector_for_each_possible_encoder(connector, encoder, i) { + drm_connector_for_each_possible_encoder(connector, encoder) { amdgpu_encoder = to_amdgpu_encoder(encoder); switch (amdgpu_encoder->encoder_id) { @@ -1292,10 +1291,9 @@ static bool amdgpu_connector_encoder_is_hbr2(struct drm_connector *connector) { struct drm_encoder *encoder; struct amdgpu_encoder *amdgpu_encoder; - int i; bool found = false; - drm_connector_for_each_possible_encoder(connector, encoder, i) { + drm_connector_for_each_possible_encoder(connector, encoder) { amdgpu_encoder = to_amdgpu_encoder(encoder); if (amdgpu_encoder->caps & ATOM_ENCODER_CAP_RECORD_HBR2) found = true; @@ -1501,10 +1499,12 @@ amdgpu_connector_add(struct amdgpu_device *adev, { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector; struct amdgpu_connector_atom_dig *amdgpu_dig_connector; struct drm_encoder *encoder; struct amdgpu_encoder *amdgpu_encoder; + struct i2c_adapter *ddc = NULL; uint32_t subpixel_order = SubPixelNone; bool shared_ddc = false; bool is_dp_bridge = false; @@ -1514,10 +1514,12 @@ amdgpu_connector_add(struct amdgpu_device *adev, return; /* see if we already added it */ - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->connector_id == connector_id) { amdgpu_connector->devices |= supported_device; + drm_connector_list_iter_end(&iter); return; } if (amdgpu_connector->ddc_bus && i2c_bus->valid) { @@ -1532,6 +1534,7 @@ amdgpu_connector_add(struct amdgpu_device *adev, } } } + drm_connector_list_iter_end(&iter); /* check if it's a dp bridge */ list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { @@ -1574,17 +1577,21 @@ amdgpu_connector_add(struct amdgpu_device *adev, amdgpu_connector->con_priv = amdgpu_dig_connector; if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); - if (amdgpu_connector->ddc_bus) + if (amdgpu_connector->ddc_bus) { has_aux = true; - else + ddc = &amdgpu_connector->ddc_bus->adapter; + } else { DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + } } switch (connector_type) { case DRM_MODE_CONNECTOR_VGA: case DRM_MODE_CONNECTOR_DVIA: default: - drm_connector_init(dev, &amdgpu_connector->base, - &amdgpu_connector_dp_funcs, connector_type); + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_dp_funcs, + connector_type, + ddc); drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); connector->interlace_allowed = true; @@ -1602,8 +1609,10 @@ amdgpu_connector_add(struct amdgpu_device *adev, case DRM_MODE_CONNECTOR_HDMIA: case DRM_MODE_CONNECTOR_HDMIB: case DRM_MODE_CONNECTOR_DisplayPort: - drm_connector_init(dev, &amdgpu_connector->base, - &amdgpu_connector_dp_funcs, connector_type); + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_dp_funcs, + connector_type, + ddc); drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); drm_object_attach_property(&amdgpu_connector->base.base, @@ -1644,8 +1653,10 @@ amdgpu_connector_add(struct amdgpu_device *adev, break; case DRM_MODE_CONNECTOR_LVDS: case DRM_MODE_CONNECTOR_eDP: - drm_connector_init(dev, &amdgpu_connector->base, - &amdgpu_connector_edp_funcs, connector_type); + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_edp_funcs, + connector_type, + ddc); drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); drm_object_attach_property(&amdgpu_connector->base.base, @@ -1659,13 +1670,18 @@ amdgpu_connector_add(struct amdgpu_device *adev, } else { switch (connector_type) { case DRM_MODE_CONNECTOR_VGA: - drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_vga_funcs, connector_type); - drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs); if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); if (!amdgpu_connector->ddc_bus) DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + else + ddc = &amdgpu_connector->ddc_bus->adapter; } + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_vga_funcs, + connector_type, + ddc); + drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs); amdgpu_connector->dac_load_detect = true; drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.load_detect_property, @@ -1679,13 +1695,18 @@ amdgpu_connector_add(struct amdgpu_device *adev, connector->doublescan_allowed = true; break; case DRM_MODE_CONNECTOR_DVIA: - drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_vga_funcs, connector_type); - drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs); if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); if (!amdgpu_connector->ddc_bus) DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + else + ddc = &amdgpu_connector->ddc_bus->adapter; } + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_vga_funcs, + connector_type, + ddc); + drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs); amdgpu_connector->dac_load_detect = true; drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.load_detect_property, @@ -1704,13 +1725,18 @@ amdgpu_connector_add(struct amdgpu_device *adev, if (!amdgpu_dig_connector) goto failed; amdgpu_connector->con_priv = amdgpu_dig_connector; - drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_dvi_funcs, connector_type); - drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs); if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); if (!amdgpu_connector->ddc_bus) DRM_ERROR("DVI: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + else + ddc = &amdgpu_connector->ddc_bus->adapter; } + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_dvi_funcs, + connector_type, + ddc); + drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs); subpixel_order = SubPixelHorizontalRGB; drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.coherent_mode_property, @@ -1754,13 +1780,18 @@ amdgpu_connector_add(struct amdgpu_device *adev, if (!amdgpu_dig_connector) goto failed; amdgpu_connector->con_priv = amdgpu_dig_connector; - drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_dvi_funcs, connector_type); - drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs); if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); if (!amdgpu_connector->ddc_bus) DRM_ERROR("HDMI: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + else + ddc = &amdgpu_connector->ddc_bus->adapter; } + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_dvi_funcs, + connector_type, + ddc); + drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs); drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.coherent_mode_property, 1); @@ -1796,15 +1827,20 @@ amdgpu_connector_add(struct amdgpu_device *adev, if (!amdgpu_dig_connector) goto failed; amdgpu_connector->con_priv = amdgpu_dig_connector; - drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_dp_funcs, connector_type); - drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); - if (amdgpu_connector->ddc_bus) + if (amdgpu_connector->ddc_bus) { has_aux = true; - else + ddc = &amdgpu_connector->ddc_bus->adapter; + } else { DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + } } + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_dp_funcs, + connector_type, + ddc); + drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); subpixel_order = SubPixelHorizontalRGB; drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.coherent_mode_property, @@ -1838,15 +1874,20 @@ amdgpu_connector_add(struct amdgpu_device *adev, if (!amdgpu_dig_connector) goto failed; amdgpu_connector->con_priv = amdgpu_dig_connector; - drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_edp_funcs, connector_type); - drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); - if (amdgpu_connector->ddc_bus) + if (amdgpu_connector->ddc_bus) { has_aux = true; - else + ddc = &amdgpu_connector->ddc_bus->adapter; + } else { DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + } } + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_edp_funcs, + connector_type, + ddc); + drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); drm_object_attach_property(&amdgpu_connector->base.base, dev->mode_config.scaling_mode_property, DRM_MODE_SCALE_FULLSCREEN); @@ -1859,13 +1900,18 @@ amdgpu_connector_add(struct amdgpu_device *adev, if (!amdgpu_dig_connector) goto failed; amdgpu_connector->con_priv = amdgpu_dig_connector; - drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_lvds_funcs, connector_type); - drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_lvds_helper_funcs); if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); if (!amdgpu_connector->ddc_bus) DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + else + ddc = &amdgpu_connector->ddc_bus->adapter; } + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_lvds_funcs, + connector_type, + ddc); + drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_lvds_helper_funcs); drm_object_attach_property(&amdgpu_connector->base.base, dev->mode_config.scaling_mode_property, DRM_MODE_SCALE_FULLSCREEN); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 52a5e4fdc95b..5ca905b4a0fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -24,15 +24,18 @@ * Authors: * Jerome Glisse <glisse@freedesktop.org> */ + +#include <linux/file.h> #include <linux/pagemap.h> #include <linux/sync_file.h> -#include <drm/drmP.h> + #include <drm/amdgpu_drm.h> #include <drm/drm_syncobj.h> #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_gmc.h" #include "amdgpu_gem.h" +#include "amdgpu_ras.h" static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, struct drm_amdgpu_cs_chunk_fence *data, @@ -52,7 +55,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, p->uf_entry.tv.bo = &bo->tbo; /* One for TTM and one for the CS job */ p->uf_entry.tv.num_shared = 2; - p->uf_entry.user_pages = NULL; drm_gem_object_put_unlocked(gobj); @@ -215,6 +217,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs case AMDGPU_CHUNK_ID_SYNCOBJ_IN: case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: break; default: @@ -399,7 +403,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, struct ttm_operation_ctx ctx = { .interruptible = true, .no_wait_gpu = false, - .resv = bo->tbo.resv, + .resv = bo->tbo.base.resv, .flags = 0 }; uint32_t domain; @@ -446,75 +450,12 @@ retry: return r; } -/* Last resort, try to evict something from the current working set */ -static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, - struct amdgpu_bo *validated) -{ - uint32_t domain = validated->allowed_domains; - struct ttm_operation_ctx ctx = { true, false }; - int r; - - if (!p->evictable) - return false; - - for (;&p->evictable->tv.head != &p->validated; - p->evictable = list_prev_entry(p->evictable, tv.head)) { - - struct amdgpu_bo_list_entry *candidate = p->evictable; - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo); - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - bool update_bytes_moved_vis; - uint32_t other; - - /* If we reached our current BO we can forget it */ - if (bo == validated) - break; - - /* We can't move pinned BOs here */ - if (bo->pin_count) - continue; - - other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); - - /* Check if this BO is in one of the domains we need space for */ - if (!(other & domain)) - continue; - - /* Check if we can move this BO somewhere else */ - other = bo->allowed_domains & ~domain; - if (!other) - continue; - - /* Good we can try to move this BO somewhere else */ - update_bytes_moved_vis = - !amdgpu_gmc_vram_full_visible(&adev->gmc) && - amdgpu_bo_in_cpu_visible_vram(bo); - amdgpu_bo_placement_from_domain(bo, other); - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - p->bytes_moved += ctx.bytes_moved; - if (update_bytes_moved_vis) - p->bytes_moved_vis += ctx.bytes_moved; - - if (unlikely(r)) - break; - - p->evictable = list_prev_entry(p->evictable, tv.head); - list_move(&candidate->tv.head, &p->validated); - - return true; - } - - return false; -} - static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo) { struct amdgpu_cs_parser *p = param; int r; - do { - r = amdgpu_cs_bo_validate(p, bo); - } while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo)); + r = amdgpu_cs_bo_validate(p, bo); if (r) return r; @@ -533,37 +474,30 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, list_for_each_entry(lobj, validated, tv.head) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo); - bool binding_userptr = false; struct mm_struct *usermm; usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm); if (usermm && usermm != current->mm) return -EPERM; - /* Check if we have user pages and nobody bound the BO already */ - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && - lobj->user_pages) { + if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) && + lobj->user_invalidated && lobj->user_pages) { amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) return r; + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, lobj->user_pages); - binding_userptr = true; } - if (p->evictable == lobj) - p->evictable = NULL; - r = amdgpu_cs_validate(p, bo); if (r) return r; - if (binding_userptr) { - kvfree(lobj->user_pages); - lobj->user_pages = NULL; - } + kvfree(lobj->user_pages); + lobj->user_pages = NULL; } return 0; } @@ -578,7 +512,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_bo *gds; struct amdgpu_bo *gws; struct amdgpu_bo *oa; - unsigned tries = 10; int r; INIT_LIST_HEAD(&p->validated); @@ -605,8 +538,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, e->tv.num_shared = 2; amdgpu_bo_list_get_list(p->bo_list, &p->validated); - if (p->bo_list->first_userptr != p->bo_list->num_entries) - p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); INIT_LIST_HEAD(&duplicates); amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); @@ -614,88 +545,51 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent) list_add(&p->uf_entry.tv.head, &p->validated); - while (1) { - struct list_head need_pages; - - r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, - &duplicates); - if (unlikely(r != 0)) { - if (r != -ERESTARTSYS) - DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); - goto error_free_pages; - } - - INIT_LIST_HEAD(&need_pages); - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - - if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, - &e->user_invalidated) && e->user_pages) { - - /* We acquired a page array, but somebody - * invalidated it. Free it and try again - */ - release_pages(e->user_pages, - bo->tbo.ttm->num_pages); - kvfree(e->user_pages); - e->user_pages = NULL; - } - - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && - !e->user_pages) { - list_del(&e->tv.head); - list_add(&e->tv.head, &need_pages); - - amdgpu_bo_unreserve(bo); - } + /* Get userptr backing pages. If pages are updated after registered + * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do + * amdgpu_ttm_backend_bind() to flush and invalidate new pages + */ + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + bool userpage_invalidated = false; + int i; + + e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, + sizeof(struct page *), + GFP_KERNEL | __GFP_ZERO); + if (!e->user_pages) { + DRM_ERROR("calloc failure\n"); + return -ENOMEM; } - if (list_empty(&need_pages)) - break; - - /* Unreserve everything again. */ - ttm_eu_backoff_reservation(&p->ticket, &p->validated); - - /* We tried too many times, just abort */ - if (!--tries) { - r = -EDEADLK; - DRM_ERROR("deadlock in %s\n", __func__); - goto error_free_pages; + r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages); + if (r) { + kvfree(e->user_pages); + e->user_pages = NULL; + return r; } - /* Fill the page arrays for all userptrs. */ - list_for_each_entry(e, &need_pages, tv.head) { - struct ttm_tt *ttm = e->tv.bo->ttm; - - e->user_pages = kvmalloc_array(ttm->num_pages, - sizeof(struct page*), - GFP_KERNEL | __GFP_ZERO); - if (!e->user_pages) { - r = -ENOMEM; - DRM_ERROR("calloc failure in %s\n", __func__); - goto error_free_pages; - } - - r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); - if (r) { - DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n"); - kvfree(e->user_pages); - e->user_pages = NULL; - goto error_free_pages; + for (i = 0; i < bo->tbo.ttm->num_pages; i++) { + if (bo->tbo.ttm->pages[i] != e->user_pages[i]) { + userpage_invalidated = true; + break; } } + e->user_invalidated = userpage_invalidated; + } - /* And try again. */ - list_splice(&need_pages, &p->validated); + r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, + &duplicates); + if (unlikely(r != 0)) { + if (r != -ERESTARTSYS) + DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); + goto out; } amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, &p->bytes_moved_vis_threshold); p->bytes_moved = 0; p->bytes_moved_vis = 0; - p->evictable = list_last_entry(&p->validated, - struct amdgpu_bo_list_entry, - tv.head); r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm, amdgpu_cs_validate, p); @@ -705,16 +599,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, } r = amdgpu_cs_list_validate(p, &duplicates); - if (r) { - DRM_ERROR("amdgpu_cs_list_validate(duplicates) failed.\n"); + if (r) goto error_validate; - } r = amdgpu_cs_list_validate(p, &p->validated); - if (r) { - DRM_ERROR("amdgpu_cs_list_validate(validated) failed.\n"); + if (r) goto error_validate; - } amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, p->bytes_moved_vis); @@ -755,17 +645,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, error_validate: if (r) ttm_eu_backoff_reservation(&p->ticket, &p->validated); - -error_free_pages: - - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - if (!e->user_pages) - continue; - - release_pages(e->user_pages, e->tv.bo->ttm->num_pages); - kvfree(e->user_pages); - } - +out: return r; } @@ -776,7 +656,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) list_for_each_entry(e, &p->validated, tv.head) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - struct reservation_object *resv = bo->tbo.resv; + struct dma_resv *resv = bo->tbo.base.resv; r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp, amdgpu_bo_explicit_sync(bo)); @@ -804,9 +684,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); - for (i = 0; i < parser->num_post_dep_syncobjs; i++) - drm_syncobj_put(parser->post_dep_syncobjs[i]); - kfree(parser->post_dep_syncobjs); + for (i = 0; i < parser->num_post_deps; i++) { + drm_syncobj_put(parser->post_deps[i].syncobj); + kfree(parser->post_deps[i].chain); + } + kfree(parser->post_deps); dma_fence_put(parser->fence); @@ -918,7 +800,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - if (amdgpu_sriov_vf(adev)) { + if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { struct dma_fence *f; bo_va = fpriv->csa_va; @@ -959,7 +841,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_vm_update_directories(adev, vm); + r = amdgpu_vm_update_pdes(adev, vm, false); if (r) return r; @@ -1007,7 +889,8 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) continue; - if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && amdgpu_sriov_vf(adev)) { + if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && + (amdgpu_mcbp || amdgpu_sriov_vf(adev))) { if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) ce_preempt++; @@ -1050,11 +933,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, j++; } - /* UVD & VCE fw doesn't support user fences */ + /* MM engine doesn't support user fences */ ring = to_amdgpu_ring(parser->entity->rq->sched); - if (parser->job->uf_addr && ( - ring->funcs->type == AMDGPU_RING_TYPE_UVD || - ring->funcs->type == AMDGPU_RING_TYPE_VCE)) + if (parser->job->uf_addr && ring->funcs->no_user_fence) return -EINVAL; return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity); @@ -1089,41 +970,44 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, return r; } - fence = amdgpu_ctx_get_fence(ctx, entity, - deps[i].handle); + fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle); + amdgpu_ctx_put(ctx); + + if (IS_ERR(fence)) + return PTR_ERR(fence); + else if (!fence) + continue; if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { - struct drm_sched_fence *s_fence = to_drm_sched_fence(fence); + struct drm_sched_fence *s_fence; struct dma_fence *old = fence; + s_fence = to_drm_sched_fence(fence); fence = dma_fence_get(&s_fence->scheduled); dma_fence_put(old); } - if (IS_ERR(fence)) { - r = PTR_ERR(fence); - amdgpu_ctx_put(ctx); + r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true); + dma_fence_put(fence); + if (r) return r; - } else if (fence) { - r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, - true); - dma_fence_put(fence); - amdgpu_ctx_put(ctx); - if (r) - return r; - } } return 0; } static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, - uint32_t handle) + uint32_t handle, u64 point, + u64 flags) { - int r; struct dma_fence *fence; - r = drm_syncobj_find_fence(p->filp, handle, 0, 0, &fence); - if (r) + int r; + + r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence); + if (r) { + DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n", + handle, point, r); return r; + } r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true); dma_fence_put(fence); @@ -1134,46 +1018,123 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p, struct amdgpu_cs_chunk *chunk) { + struct drm_amdgpu_cs_chunk_sem *deps; unsigned num_deps; int i, r; - struct drm_amdgpu_cs_chunk_sem *deps; deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; num_deps = chunk->length_dw * 4 / sizeof(struct drm_amdgpu_cs_chunk_sem); + for (i = 0; i < num_deps; ++i) { + r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle, + 0, 0); + if (r) + return r; + } + return 0; +} + + +static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; + unsigned num_deps; + int i, r; + + syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_syncobj); for (i = 0; i < num_deps; ++i) { - r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle); + r = amdgpu_syncobj_lookup_and_add_to_sync(p, + syncobj_deps[i].handle, + syncobj_deps[i].point, + syncobj_deps[i].flags); if (r) return r; } + return 0; } static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, struct amdgpu_cs_chunk *chunk) { + struct drm_amdgpu_cs_chunk_sem *deps; unsigned num_deps; int i; - struct drm_amdgpu_cs_chunk_sem *deps; + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; num_deps = chunk->length_dw * 4 / sizeof(struct drm_amdgpu_cs_chunk_sem); - p->post_dep_syncobjs = kmalloc_array(num_deps, - sizeof(struct drm_syncobj *), - GFP_KERNEL); - p->num_post_dep_syncobjs = 0; + if (p->post_deps) + return -EINVAL; + + p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), + GFP_KERNEL); + p->num_post_deps = 0; - if (!p->post_dep_syncobjs) + if (!p->post_deps) return -ENOMEM; + for (i = 0; i < num_deps; ++i) { - p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle); - if (!p->post_dep_syncobjs[i]) + p->post_deps[i].syncobj = + drm_syncobj_find(p->filp, deps[i].handle); + if (!p->post_deps[i].syncobj) return -EINVAL; - p->num_post_dep_syncobjs++; + p->post_deps[i].chain = NULL; + p->post_deps[i].point = 0; + p->num_post_deps++; } + + return 0; +} + + +static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; + unsigned num_deps; + int i; + + syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_syncobj); + + if (p->post_deps) + return -EINVAL; + + p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), + GFP_KERNEL); + p->num_post_deps = 0; + + if (!p->post_deps) + return -ENOMEM; + + for (i = 0; i < num_deps; ++i) { + struct amdgpu_cs_post_dep *dep = &p->post_deps[i]; + + dep->chain = NULL; + if (syncobj_deps[i].point) { + dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL); + if (!dep->chain) + return -ENOMEM; + } + + dep->syncobj = drm_syncobj_find(p->filp, + syncobj_deps[i].handle); + if (!dep->syncobj) { + kfree(dep->chain); + return -EINVAL; + } + dep->point = syncobj_deps[i].point; + p->num_post_deps++; + } + return 0; } @@ -1187,19 +1148,33 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, chunk = &p->chunks[i]; - if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES || - chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { + switch (chunk->chunk_id) { + case AMDGPU_CHUNK_ID_DEPENDENCIES: + case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: r = amdgpu_cs_process_fence_dep(p, chunk); if (r) return r; - } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) { + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_IN: r = amdgpu_cs_process_syncobj_in_dep(p, chunk); if (r) return r; - } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) { + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: r = amdgpu_cs_process_syncobj_out_dep(p, chunk); if (r) return r; + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: + r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk); + if (r) + return r; + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: + r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk); + if (r) + return r; + break; } } @@ -1210,8 +1185,17 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) { int i; - for (i = 0; i < p->num_post_dep_syncobjs; ++i) - drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence); + for (i = 0; i < p->num_post_deps; ++i) { + if (p->post_deps[i].chain && p->post_deps[i].point) { + drm_syncobj_add_point(p->post_deps[i].syncobj, + p->post_deps[i].chain, + p->fence, p->post_deps[i].point); + p->post_deps[i].chain = NULL; + } else { + drm_syncobj_replace_fence(p->post_deps[i].syncobj, + p->fence); + } + } } static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, @@ -1224,7 +1208,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, struct amdgpu_bo_list_entry *e; struct amdgpu_job *job; uint64_t seq; - int r; job = p->job; @@ -1234,15 +1217,23 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, if (r) goto error_unlock; - /* No memory allocation is allowed while holding the mn lock */ - amdgpu_mn_lock(p->mn); + /* No memory allocation is allowed while holding the notifier lock. + * The lock is held until amdgpu_cs_submit is finished and fence is + * added to BOs. + */ + mutex_lock(&p->adev->notifier_lock); + + /* If userptr are invalidated after amdgpu_cs_parser_bos(), return + * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl. + */ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { - r = -ERESTARTSYS; - goto error_abort; - } + r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + } + if (r) { + r = -EAGAIN; + goto error_abort; } job->owner = p->filp; @@ -1273,13 +1264,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); - amdgpu_mn_unlock(p->mn); + mutex_unlock(&p->adev->notifier_lock); return 0; error_abort: drm_sched_job_cleanup(&job->base); - amdgpu_mn_unlock(p->mn); + mutex_unlock(&p->adev->notifier_lock); error_unlock: amdgpu_job_free(job); @@ -1294,6 +1285,9 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) bool reserved_buffers = false; int i, r; + if (amdgpu_ras_intr_triggered()) + return -EHWPOISON; + if (!adev->accel_working) return -EBUSY; @@ -1320,7 +1314,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r) { if (r == -ENOMEM) DRM_ERROR("Not enough memory for command submission!\n"); - else if (r != -ERESTARTSYS) + else if (r != -ERESTARTSYS && r != -EAGAIN) DRM_ERROR("Failed to process the buffer list %d!\n", r); goto out; } @@ -1338,6 +1332,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) out: amdgpu_cs_parser_fini(&parser, r, reserved_buffers); + return r; } @@ -1666,7 +1661,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, *map = mapping; /* Double check that the BO is reserved by this CS */ - if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket) + if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket) return -EINVAL; if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 7e22be7ca68a..08047bc4d588 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -47,6 +47,7 @@ int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo return -ENOMEM; memset(ptr, 0, size); + adev->virt.csa_cpu_addr = ptr; return 0; } @@ -92,15 +93,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, return -ENOMEM; } - r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr, - size); - if (r) { - DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); - amdgpu_vm_bo_rmv(adev, *bo_va); - ttm_eu_backoff_reservation(&ticket, &list); - return r; - } - r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size, AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_EXECUTABLE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 7b526593eb77..2cdaf3b2a721 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -22,10 +22,10 @@ * Authors: monk liu <monk.liu@amd.com> */ -#include <drm/drmP.h> #include <drm/drm_auth.h> #include "amdgpu.h" #include "amdgpu_sched.h" +#include "amdgpu_ras.h" #define to_amdgpu_ctx_entity(e) \ container_of((e), struct amdgpu_ctx_entity, entity) @@ -42,7 +42,7 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = { [AMDGPU_HW_IP_VCN_JPEG] = 1, }; -static int amdgput_ctx_total_num_entities(void) +static int amdgpu_ctx_total_num_entities(void) { unsigned i, num_entities = 0; @@ -73,8 +73,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct drm_file *filp, struct amdgpu_ctx *ctx) { - unsigned num_entities = amdgput_ctx_total_num_entities(); - unsigned i, j; + unsigned num_entities = amdgpu_ctx_total_num_entities(); + unsigned i, j, k; int r; if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX) @@ -123,7 +123,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS]; - unsigned num_rings; + unsigned num_rings = 0; unsigned num_rqs = 0; switch (i) { @@ -154,16 +154,26 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, num_rings = 1; break; case AMDGPU_HW_IP_VCN_DEC: - rings[0] = &adev->vcn.ring_dec; - num_rings = 1; + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + rings[num_rings++] = &adev->vcn.inst[j].ring_dec; + } break; case AMDGPU_HW_IP_VCN_ENC: - rings[0] = &adev->vcn.ring_enc[0]; - num_rings = 1; + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + for (k = 0; k < adev->vcn.num_enc_rings; ++k) + rings[num_rings++] = &adev->vcn.inst[j].ring_enc[k]; + } break; case AMDGPU_HW_IP_VCN_JPEG: - rings[0] = &adev->vcn.ring_jpeg; - num_rings = 1; + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + rings[num_rings++] = &adev->vcn.inst[j].ring_jpeg; + } break; } @@ -197,7 +207,7 @@ error_free_fences: static void amdgpu_ctx_fini(struct kref *ref) { struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); - unsigned num_entities = amdgput_ctx_total_num_entities(); + unsigned num_entities = amdgpu_ctx_total_num_entities(); struct amdgpu_device *adev = ctx->adev; unsigned i, j; @@ -279,10 +289,7 @@ static void amdgpu_ctx_do_release(struct kref *ref) ctx = container_of(ref, struct amdgpu_ctx, refcount); - num_entities = 0; - for (i = 0; i < AMDGPU_HW_IP_NUM; i++) - num_entities += amdgpu_ctx_num_entities[i]; - + num_entities = amdgpu_ctx_total_num_entities(); for (i = 0; i < num_entities; i++) drm_sched_entity_destroy(&ctx->entities[0][i].entity); @@ -344,6 +351,7 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev, { struct amdgpu_ctx *ctx; struct amdgpu_ctx_mgr *mgr; + unsigned long ras_counter; if (!fpriv) return -EINVAL; @@ -368,6 +376,21 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev, if (atomic_read(&ctx->guilty)) out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY; + /*query ue count*/ + ras_counter = amdgpu_ras_query_error_count(adev, false); + /*ras counter is monotonic increasing*/ + if (ras_counter != ctx->ras_counter_ue) { + out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE; + ctx->ras_counter_ue = ras_counter; + } + + /*query ce count*/ + ras_counter = amdgpu_ras_query_error_count(adev, true); + if (ras_counter != ctx->ras_counter_ce) { + out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE; + ctx->ras_counter_ce = ras_counter; + } + mutex_unlock(&mgr->lock); return 0; } @@ -498,7 +521,7 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, enum drm_sched_priority priority) { - unsigned num_entities = amdgput_ctx_total_num_entities(); + unsigned num_entities = amdgpu_ctx_total_num_entities(); enum drm_sched_priority ctx_prio; unsigned i; @@ -518,21 +541,24 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity) { struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); - unsigned idx = centity->sequence & (amdgpu_sched_jobs - 1); - struct dma_fence *other = centity->fences[idx]; + struct dma_fence *other; + unsigned idx; + long r; - if (other) { - signed long r; - r = dma_fence_wait(other, true); - if (r < 0) { - if (r != -ERESTARTSYS) - DRM_ERROR("Error (%ld) waiting for fence!\n", r); + spin_lock(&ctx->ring_lock); + idx = centity->sequence & (amdgpu_sched_jobs - 1); + other = dma_fence_get(centity->fences[idx]); + spin_unlock(&ctx->ring_lock); - return r; - } - } + if (!other) + return 0; - return 0; + r = dma_fence_wait(other, true); + if (r < 0 && r != -ERESTARTSYS) + DRM_ERROR("Error (%ld) waiting for fence!\n", r); + + dma_fence_put(other); + return r; } void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) @@ -541,37 +567,31 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) idr_init(&mgr->ctx_handles); } -void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr) +long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) { - unsigned num_entities = amdgput_ctx_total_num_entities(); + unsigned num_entities = amdgpu_ctx_total_num_entities(); struct amdgpu_ctx *ctx; struct idr *idp; uint32_t id, i; - long max_wait = MAX_WAIT_SCHED_ENTITY_Q_EMPTY; idp = &mgr->ctx_handles; mutex_lock(&mgr->lock); idr_for_each_entry(idp, ctx, id) { - - if (!ctx->adev) { - mutex_unlock(&mgr->lock); - return; - } - for (i = 0; i < num_entities; i++) { struct drm_sched_entity *entity; entity = &ctx->entities[0][i].entity; - max_wait = drm_sched_entity_flush(entity, max_wait); + timeout = drm_sched_entity_flush(entity, timeout); } } mutex_unlock(&mgr->lock); + return timeout; } void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) { - unsigned num_entities = amdgput_ctx_total_num_entities(); + unsigned num_entities = amdgpu_ctx_total_num_entities(); struct amdgpu_ctx *ctx; struct idr *idp; uint32_t id, i; @@ -579,17 +599,16 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) idp = &mgr->ctx_handles; idr_for_each_entry(idp, ctx, id) { - - if (!ctx->adev) - return; - if (kref_read(&ctx->refcount) != 1) { DRM_ERROR("ctx %p is still alive\n", ctx); continue; } - for (i = 0; i < num_entities; i++) + for (i = 0; i < num_entities; i++) { + mutex_lock(&ctx->adev->lock_reset); drm_sched_entity_fini(&ctx->entities[0][i].entity); + mutex_unlock(&ctx->adev->lock_reset); + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index b3b012c0a7da..da808633732b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -49,6 +49,8 @@ struct amdgpu_ctx { enum drm_sched_priority override_priority; struct mutex lock; atomic_t guilty; + unsigned long ras_counter_ce; + unsigned long ras_counter_ue; }; struct amdgpu_ctx_mgr { @@ -82,7 +84,7 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr); -void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr); +long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout); void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 4ae3ff9a1d4c..8e6726e0d035 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -24,8 +24,11 @@ */ #include <linux/kthread.h> -#include <drm/drmP.h> -#include <linux/debugfs.h> +#include <linux/pci.h> +#include <linux/uaccess.h> + +#include <drm/drm_debugfs.h> + #include "amdgpu.h" /** @@ -103,10 +106,10 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, ssize_t result = 0; int r; bool pm_pg_lock, use_bank, use_ring; - unsigned instance_bank, sh_bank, se_bank, me, pipe, queue; + unsigned instance_bank, sh_bank, se_bank, me, pipe, queue, vmid; pm_pg_lock = use_bank = use_ring = false; - instance_bank = sh_bank = se_bank = me = pipe = queue = 0; + instance_bank = sh_bank = se_bank = me = pipe = queue = vmid = 0; if (size & 0x3 || *pos & 0x3 || ((*pos & (1ULL << 62)) && (*pos & (1ULL << 61)))) @@ -132,6 +135,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, me = (*pos & GENMASK_ULL(33, 24)) >> 24; pipe = (*pos & GENMASK_ULL(43, 34)) >> 34; queue = (*pos & GENMASK_ULL(53, 44)) >> 44; + vmid = (*pos & GENMASK_ULL(58, 54)) >> 54; use_ring = 1; } else { @@ -149,7 +153,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, sh_bank, instance_bank); } else if (use_ring) { mutex_lock(&adev->srbm_mutex); - amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue); + amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid); } if (pm_pg_lock) @@ -182,7 +186,7 @@ end: amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); } else if (use_ring) { - amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0); + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } @@ -568,10 +572,9 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, idx = *pos >> 2; valuesize = sizeof(values); - if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->read_sensor) - r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize); - else - return -EINVAL; + r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize); + if (r) + return r; if (size > valuesize) return -EINVAL; @@ -704,7 +707,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, thread = (*pos & GENMASK_ULL(59, 52)) >> 52; bank = (*pos & GENMASK_ULL(61, 60)) >> 60; - data = kmalloc_array(1024, sizeof(*data), GFP_KERNEL); + data = kcalloc(1024, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; @@ -856,6 +859,9 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) struct amdgpu_device *adev = dev->dev_private; int r = 0, i; + /* Avoid accidently unparking the sched thread during GPU reset */ + mutex_lock(&adev->lock_reset); + /* hold on the scheduler */ for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; @@ -881,6 +887,8 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) kthread_unpark(ring->sched.thread); } + mutex_unlock(&adev->lock_reset); + return 0; } @@ -921,17 +929,198 @@ static const struct drm_info_list amdgpu_debugfs_list[] = { {"amdgpu_evict_gtt", &amdgpu_debugfs_evict_gtt}, }; +static void amdgpu_ib_preempt_fences_swap(struct amdgpu_ring *ring, + struct dma_fence **fences) +{ + struct amdgpu_fence_driver *drv = &ring->fence_drv; + uint32_t sync_seq, last_seq; + + last_seq = atomic_read(&ring->fence_drv.last_seq); + sync_seq = ring->fence_drv.sync_seq; + + last_seq &= drv->num_fences_mask; + sync_seq &= drv->num_fences_mask; + + do { + struct dma_fence *fence, **ptr; + + ++last_seq; + last_seq &= drv->num_fences_mask; + ptr = &drv->fences[last_seq]; + + fence = rcu_dereference_protected(*ptr, 1); + RCU_INIT_POINTER(*ptr, NULL); + + if (!fence) + continue; + + fences[last_seq] = fence; + + } while (last_seq != sync_seq); +} + +static void amdgpu_ib_preempt_signal_fences(struct dma_fence **fences, + int length) +{ + int i; + struct dma_fence *fence; + + for (i = 0; i < length; i++) { + fence = fences[i]; + if (!fence) + continue; + dma_fence_signal(fence); + dma_fence_put(fence); + } +} + +static void amdgpu_ib_preempt_job_recovery(struct drm_gpu_scheduler *sched) +{ + struct drm_sched_job *s_job; + struct dma_fence *fence; + + spin_lock(&sched->job_list_lock); + list_for_each_entry(s_job, &sched->ring_mirror_list, node) { + fence = sched->ops->run_job(s_job); + dma_fence_put(fence); + } + spin_unlock(&sched->job_list_lock); +} + +static void amdgpu_ib_preempt_mark_partial_job(struct amdgpu_ring *ring) +{ + struct amdgpu_job *job; + struct drm_sched_job *s_job; + uint32_t preempt_seq; + struct dma_fence *fence, **ptr; + struct amdgpu_fence_driver *drv = &ring->fence_drv; + struct drm_gpu_scheduler *sched = &ring->sched; + + if (ring->funcs->type != AMDGPU_RING_TYPE_GFX) + return; + + preempt_seq = le32_to_cpu(*(drv->cpu_addr + 2)); + if (preempt_seq <= atomic_read(&drv->last_seq)) + return; + + preempt_seq &= drv->num_fences_mask; + ptr = &drv->fences[preempt_seq]; + fence = rcu_dereference_protected(*ptr, 1); + + spin_lock(&sched->job_list_lock); + list_for_each_entry(s_job, &sched->ring_mirror_list, node) { + job = to_amdgpu_job(s_job); + if (job->fence == fence) + /* mark the job as preempted */ + job->preemption_status |= AMDGPU_IB_PREEMPTED; + } + spin_unlock(&sched->job_list_lock); +} + +static int amdgpu_debugfs_ib_preempt(void *data, u64 val) +{ + int r, resched, length; + struct amdgpu_ring *ring; + struct dma_fence **fences = NULL; + struct amdgpu_device *adev = (struct amdgpu_device *)data; + + if (val >= AMDGPU_MAX_RINGS) + return -EINVAL; + + ring = adev->rings[val]; + + if (!ring || !ring->funcs->preempt_ib || !ring->sched.thread) + return -EINVAL; + + /* the last preemption failed */ + if (ring->trail_seq != le32_to_cpu(*ring->trail_fence_cpu_addr)) + return -EBUSY; + + length = ring->fence_drv.num_fences_mask + 1; + fences = kcalloc(length, sizeof(void *), GFP_KERNEL); + if (!fences) + return -ENOMEM; + + /* Avoid accidently unparking the sched thread during GPU reset */ + mutex_lock(&adev->lock_reset); + + /* stop the scheduler */ + kthread_park(ring->sched.thread); + + resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); + + /* preempt the IB */ + r = amdgpu_ring_preempt_ib(ring); + if (r) { + DRM_WARN("failed to preempt ring %d\n", ring->idx); + goto failure; + } + + amdgpu_fence_process(ring); + + if (atomic_read(&ring->fence_drv.last_seq) != + ring->fence_drv.sync_seq) { + DRM_INFO("ring %d was preempted\n", ring->idx); + + amdgpu_ib_preempt_mark_partial_job(ring); + + /* swap out the old fences */ + amdgpu_ib_preempt_fences_swap(ring, fences); + + amdgpu_fence_driver_force_completion(ring); + + /* resubmit unfinished jobs */ + amdgpu_ib_preempt_job_recovery(&ring->sched); + + /* wait for jobs finished */ + amdgpu_fence_wait_empty(ring); + + /* signal the old fences */ + amdgpu_ib_preempt_signal_fences(fences, length); + } + +failure: + /* restart the scheduler */ + kthread_unpark(ring->sched.thread); + + mutex_unlock(&adev->lock_reset); + + ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); + + kfree(fences); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_ib_preempt, NULL, + amdgpu_debugfs_ib_preempt, "%llu\n"); + int amdgpu_debugfs_init(struct amdgpu_device *adev) { + adev->debugfs_preempt = + debugfs_create_file("amdgpu_preempt_ib", 0600, + adev->ddev->primary->debugfs_root, adev, + &fops_ib_preempt); + if (!(adev->debugfs_preempt)) { + DRM_ERROR("unable to create amdgpu_preempt_ib debugsfs file\n"); + return -EIO; + } + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_list, ARRAY_SIZE(amdgpu_debugfs_list)); } +void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev) +{ + debugfs_remove(adev->debugfs_preempt); +} + #else int amdgpu_debugfs_init(struct amdgpu_device *adev) { return 0; } +void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev) { } int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h index 8260d8073c26..f289d28ad6b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h @@ -34,6 +34,7 @@ struct amdgpu_debugfs { int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev); int amdgpu_debugfs_init(struct amdgpu_device *adev); +void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev); int amdgpu_debugfs_add_files(struct amdgpu_device *adev, const struct drm_info_list *files, unsigned nfiles); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ac0d646a7b74..c17505fba988 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -27,9 +27,10 @@ */ #include <linux/power_supply.h> #include <linux/kthread.h> +#include <linux/module.h> #include <linux/console.h> #include <linux/slab.h> -#include <drm/drmP.h> + #include <drm/drm_atomic_helper.h> #include <drm/drm_probe_helper.h> #include <drm/amdgpu_drm.h> @@ -51,6 +52,7 @@ #endif #include "vi.h" #include "soc15.h" +#include "nv.h" #include "bif/bif_4_1_d.h" #include <linux/pci.h> #include <linux/firmware.h> @@ -60,16 +62,25 @@ #include "amdgpu_pm.h" #include "amdgpu_xgmi.h" +#include "amdgpu_ras.h" +#include "amdgpu_pmu.h" + +#include <linux/suspend.h> MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 -static const char *amdgpu_asic_name[] = { +const char *amdgpu_asic_name[] = { "TAHITI", "PITCAIRN", "VERDE", @@ -93,9 +104,36 @@ static const char *amdgpu_asic_name[] = { "VEGA12", "VEGA20", "RAVEN", + "ARCTURUS", + "RENOIR", + "NAVI10", + "NAVI14", + "NAVI12", "LAST", }; +/** + * DOC: pcie_replay_count + * + * The amdgpu driver provides a sysfs API for reporting the total number + * of PCIe replays (NAKs) + * The file pcie_replay_count is used for this and returns the total + * number of replays as a sum of the NAKs generated and NAKs received + */ + +static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev); + + return snprintf(buf, PAGE_SIZE, "%llu\n", cnt); +} + +static DEVICE_ATTR(pcie_replay_count, S_IRUGO, + amdgpu_device_get_pcie_replay_count, NULL); + static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); /** @@ -115,6 +153,36 @@ bool amdgpu_device_is_px(struct drm_device *dev) return false; } +/** + * VRAM access helper functions. + * + * amdgpu_device_vram_access - read/write a buffer in vram + * + * @adev: amdgpu_device pointer + * @pos: offset of the buffer in vram + * @buf: virtual address of the buffer in system memory + * @size: read/write size, sizeof(@buf) must > @size + * @write: true - write to vram, otherwise - read from vram + */ +void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, + uint32_t *buf, size_t size, bool write) +{ + uint64_t last; + unsigned long flags; + + last = size - 4; + for (last += pos; pos <= last; pos += 4) { + spin_lock_irqsave(&adev->mmio_idx_lock, flags); + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); + WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31); + if (write) + WREG32_NO_KIQ(mmMM_DATA, *buf++); + else + *buf++ = RREG32_NO_KIQ(mmMM_DATA); + spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); + } +} + /* * MMIO register access helper functions. */ @@ -385,6 +453,40 @@ static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32 } /** + * amdgpu_invalid_rreg64 - dummy 64 bit reg read function + * + * @adev: amdgpu device pointer + * @reg: offset of register + * + * Dummy register read function. Used for register blocks + * that certain asics don't have (all asics). + * Returns the value in the register. + */ +static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg) +{ + DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg); + BUG(); + return 0; +} + +/** + * amdgpu_invalid_wreg64 - dummy reg write function + * + * @adev: amdgpu device pointer + * @reg: offset of register + * @v: value to write to the register + * + * Dummy register read function. Used for register blocks + * that certain asics don't have (all asics). + */ +static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v) +{ + DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n", + reg, v); + BUG(); +} + +/** * amdgpu_block_invalid_rreg - dummy reg read function * * @adev: amdgpu device pointer @@ -483,7 +585,10 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, } else { tmp = RREG32(reg); tmp &= ~and_mask; - tmp |= or_mask; + if (adev->family >= AMDGPU_FAMILY_AI) + tmp |= (or_mask & and_mask); + else + tmp |= or_mask; } WREG32(reg, tmp); } @@ -909,8 +1014,10 @@ def_value: * Validates certain module parameters and updates * the associated values used by the driver (all asics). */ -static void amdgpu_device_check_arguments(struct amdgpu_device *adev) +static int amdgpu_device_check_arguments(struct amdgpu_device *adev) { + int ret = 0; + if (amdgpu_sched_jobs < 4) { dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", amdgpu_sched_jobs); @@ -948,19 +1055,9 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev) amdgpu_device_check_block_size(adev); - if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 || - !is_power_of_2(amdgpu_vram_page_split))) { - dev_warn(adev->dev, "invalid VRAM page split (%d)\n", - amdgpu_vram_page_split); - amdgpu_vram_page_split = 1024; - } - - if (amdgpu_lockup_timeout == 0) { - dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n"); - amdgpu_lockup_timeout = 10000; - } - adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); + + return ret; } /** @@ -1355,6 +1452,21 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) else chip_name = "raven"; break; + case CHIP_ARCTURUS: + chip_name = "arcturus"; + break; + case CHIP_RENOIR: + chip_name = "renoir"; + break; + case CHIP_NAVI10: + chip_name = "navi10"; + break; + case CHIP_NAVI14: + chip_name = "navi14"; + break; + case CHIP_NAVI12: + chip_name = "navi12"; + break; } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); @@ -1383,6 +1495,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) + goto parse_soc_bounding_box; + adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se); adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh); adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se); @@ -1401,6 +1516,29 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu); adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size); + if (hdr->version_minor >= 1) { + const struct gpu_info_firmware_v1_1 *gpu_info_fw = + (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + adev->gfx.config.num_sc_per_sh = + le32_to_cpu(gpu_info_fw->num_sc_per_sh); + adev->gfx.config.num_packer_per_sc = + le32_to_cpu(gpu_info_fw->num_packer_per_sc); + } + +parse_soc_bounding_box: +#ifdef CONFIG_DRM_AMD_DC_DCN2_0 + /* + * soc bounding box info is not integrated in disocovery table, + * we always need to parse it from gpu info firmware. + */ + if (hdr->version_minor == 2) { + const struct gpu_info_firmware_v1_2 *gpu_info_fw = + (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box; + } +#endif break; } default: @@ -1480,7 +1618,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: - if (adev->asic_type == CHIP_RAVEN) + case CHIP_ARCTURUS: + case CHIP_RENOIR: + if (adev->asic_type == CHIP_RAVEN || + adev->asic_type == CHIP_RENOIR) adev->family = AMDGPU_FAMILY_RV; else adev->family = AMDGPU_FAMILY_AI; @@ -1489,6 +1630,15 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (r) return r; break; + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + adev->family = AMDGPU_FAMILY_NV; + + r = nv_set_ip_blocks(adev); + if (r) + return r; + break; default: /* FIXME: not supported yet */ return -EINVAL; @@ -1498,6 +1648,9 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (r) return r; + if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) + amdgpu_discovery_get_gfx_info(adev); + amdgpu_amdkfd_device_probe(adev); if (amdgpu_sriov_vf(adev)) { @@ -1506,7 +1659,9 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) return -EAGAIN; } - adev->powerplay.pp_feature = amdgpu_pp_feature_mask; + adev->pm.pp_feature = amdgpu_pp_feature_mask; + if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS) + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; for (i = 0; i < adev->num_ip_blocks; i++) { if ((amdgpu_ip_block_mask & (1 << i)) == 0) { @@ -1529,6 +1684,19 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) adev->ip_blocks[i].status.valid = true; } } + /* get the vbios after the asic_funcs are set up */ + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { + /* Read BIOS */ + if (!amdgpu_get_bios(adev)) + return -EINVAL; + + r = amdgpu_atombios_init(adev); + if (r) { + dev_err(adev->dev, "amdgpu_atombios_init failed\n"); + amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); + return r; + } + } } adev->cg_flags &= amdgpu_cg_mask; @@ -1547,6 +1715,7 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev) if (adev->ip_blocks[i].status.hw) continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || + (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { r = adev->ip_blocks[i].version->funcs->hw_init(adev); if (r) { @@ -1586,41 +1755,41 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) { int r = 0; int i; + uint32_t smu_version; if (adev->asic_type >= CHIP_VEGA10) { for (i = 0; i < adev->num_ip_blocks; i++) { - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { - if (adev->in_gpu_reset || adev->in_suspend) { - if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) - break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */ - r = adev->ip_blocks[i].version->funcs->resume(adev); - if (r) { - DRM_ERROR("resume of IP block <%s> failed %d\n", + if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP) + continue; + + /* no need to do the fw loading again if already done*/ + if (adev->ip_blocks[i].status.hw == true) + break; + + if (adev->in_gpu_reset || adev->in_suspend) { + r = adev->ip_blocks[i].version->funcs->resume(adev); + if (r) { + DRM_ERROR("resume of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); - return r; - } - } else { - r = adev->ip_blocks[i].version->funcs->hw_init(adev); - if (r) { - DRM_ERROR("hw_init of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - return r; - } + return r; + } + } else { + r = adev->ip_blocks[i].version->funcs->hw_init(adev); + if (r) { + DRM_ERROR("hw_init of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; } - adev->ip_blocks[i].status.hw = true; } - } - } - if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->load_firmware) { - r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle); - if (r) { - pr_err("firmware loading failed\n"); - return r; + adev->ip_blocks[i].status.hw = true; + break; } } - return 0; + r = amdgpu_pm_load_smu_firmware(adev, &smu_version); + + return r; } /** @@ -1638,6 +1807,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) { int i, r; + r = amdgpu_ras_init(adev); + if (r) + return r; + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; @@ -1669,7 +1842,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = true; /* right after GMC hw init, we create CSA */ - if (amdgpu_sriov_vf(adev)) { + if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_CSA_SIZE); @@ -1681,6 +1854,13 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) } } + r = amdgpu_ib_pool_init(adev); + if (r) { + dev_err(adev->dev, "IB initialization failed (%d).\n", r); + amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r); + goto init_failed; + } + r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/ if (r) goto init_failed; @@ -1697,6 +1877,19 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) goto init_failed; + /* + * retired pages will be loaded from eeprom and reserved here, + * it should be called after amdgpu_device_ip_hw_init_phase2 since + * for some ASICs the RAS EEPROM code relies on SMU fully functioning + * for I2C communication which only true at this point. + * recovery_init may fail, but it can free all resources allocated by + * itself and its failure should not stop amdgpu init process. + * + * Note: theoretically, this should be called before all vram allocations + * to protect retired page from abusing + */ + amdgpu_ras_recovery_init(adev); + if (adev->gmc.xgmi.num_physical_nodes > 1) amdgpu_xgmi_add_device(adev); amdgpu_amdkfd_device_init(adev); @@ -1813,6 +2006,43 @@ static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_power return 0; } +static int amdgpu_device_enable_mgpu_fan_boost(void) +{ + struct amdgpu_gpu_instance *gpu_ins; + struct amdgpu_device *adev; + int i, ret = 0; + + mutex_lock(&mgpu_info.mutex); + + /* + * MGPU fan boost feature should be enabled + * only when there are two or more dGPUs in + * the system + */ + if (mgpu_info.num_dgpu < 2) + goto out; + + for (i = 0; i < mgpu_info.num_dgpu; i++) { + gpu_ins = &(mgpu_info.gpu_ins[i]); + adev = gpu_ins->adev; + if (!(adev->flags & AMD_IS_APU) && + !gpu_ins->mgpu_fan_enabled && + adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->enable_mgpu_fan_boost) { + ret = amdgpu_dpm_enable_mgpu_fan_boost(adev); + if (ret) + break; + + gpu_ins->mgpu_fan_enabled = 1; + } + } + +out: + mutex_unlock(&mgpu_info.mutex); + + return ret; +} + /** * amdgpu_device_ip_late_init - run late init for hardware IPs * @@ -1827,6 +2057,7 @@ static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_power */ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) { + struct amdgpu_gpu_instance *gpu_instance; int i = 0, r; for (i = 0; i < adev->num_ip_blocks; i++) { @@ -1846,11 +2077,46 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); - queue_delayed_work(system_wq, &adev->late_init_work, - msecs_to_jiffies(AMDGPU_RESUME_MS)); - amdgpu_device_fill_reset_magic(adev); + r = amdgpu_device_enable_mgpu_fan_boost(); + if (r) + DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); + + + if (adev->gmc.xgmi.num_physical_nodes > 1) { + mutex_lock(&mgpu_info.mutex); + + /* + * Reset device p-state to low as this was booted with high. + * + * This should be performed only after all devices from the same + * hive get initialized. + * + * However, it's unknown how many device in the hive in advance. + * As this is counted one by one during devices initializations. + * + * So, we wait for all XGMI interlinked devices initialized. + * This may bring some delays as those devices may come from + * different hives. But that should be OK. + */ + if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) { + for (i = 0; i < mgpu_info.num_gpu; i++) { + gpu_instance = &(mgpu_info.gpu_ins[i]); + if (gpu_instance->adev->flags & AMD_IS_APU) + continue; + + r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0); + if (r) { + DRM_ERROR("pstate setting failed (%d).\n", r); + break; + } + } + } + + mutex_unlock(&mgpu_info.mutex); + } + return 0; } @@ -1869,6 +2135,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) { int i, r; + amdgpu_ras_pre_fini(adev); + if (adev->gmc.xgmi.num_physical_nodes > 1) amdgpu_xgmi_remove_device(adev); @@ -1917,6 +2185,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) amdgpu_free_static_csa(&adev->virt.csa_obj); amdgpu_device_wb_fini(adev); amdgpu_device_vram_scratch_fini(adev); + amdgpu_ib_pool_fini(adev); } r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev); @@ -1937,6 +2206,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) adev->ip_blocks[i].status.late_initialized = false; } + amdgpu_ras_fini(adev); + if (amdgpu_sriov_vf(adev)) if (amdgpu_virt_release_full_gpu(adev, false)) DRM_ERROR("failed to release exclusive mode on fini\n"); @@ -1944,61 +2215,20 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) return 0; } -static int amdgpu_device_enable_mgpu_fan_boost(void) -{ - struct amdgpu_gpu_instance *gpu_ins; - struct amdgpu_device *adev; - int i, ret = 0; - - mutex_lock(&mgpu_info.mutex); - - /* - * MGPU fan boost feature should be enabled - * only when there are two or more dGPUs in - * the system - */ - if (mgpu_info.num_dgpu < 2) - goto out; - - for (i = 0; i < mgpu_info.num_dgpu; i++) { - gpu_ins = &(mgpu_info.gpu_ins[i]); - adev = gpu_ins->adev; - if (!(adev->flags & AMD_IS_APU) && - !gpu_ins->mgpu_fan_enabled && - adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->enable_mgpu_fan_boost) { - ret = amdgpu_dpm_enable_mgpu_fan_boost(adev); - if (ret) - break; - - gpu_ins->mgpu_fan_enabled = 1; - } - } - -out: - mutex_unlock(&mgpu_info.mutex); - - return ret; -} - /** - * amdgpu_device_ip_late_init_func_handler - work handler for ib test + * amdgpu_device_delayed_init_work_handler - work handler for IB tests * * @work: work_struct. */ -static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work) +static void amdgpu_device_delayed_init_work_handler(struct work_struct *work) { struct amdgpu_device *adev = - container_of(work, struct amdgpu_device, late_init_work.work); + container_of(work, struct amdgpu_device, delayed_init_work.work); int r; r = amdgpu_ib_ring_tests(adev); if (r) DRM_ERROR("ib ring test failed (%d).\n", r); - - r = amdgpu_device_enable_mgpu_fan_boost(); - if (r) - DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); } static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) @@ -2043,7 +2273,9 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) if (r) { DRM_ERROR("suspend of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); + return r; } + adev->ip_blocks[i].status.hw = false; } } @@ -2071,6 +2303,12 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) /* displays are handled in phase1 */ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) continue; + /* PSP lost connection when err_event_athub occurs */ + if (amdgpu_ras_intr_triggered() && + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { + adev->ip_blocks[i].status.hw = false; + continue; + } /* XXX handle errors */ r = adev->ip_blocks[i].version->funcs->suspend(adev); /* XXX handle errors */ @@ -2078,6 +2316,25 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) DRM_ERROR("suspend of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); } + adev->ip_blocks[i].status.hw = false; + /* handle putting the SMC in the appropriate state */ + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { + if (is_support_sw_smu(adev)) { + r = smu_set_mp1_state(&adev->smu, adev->mp1_state); + } else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_mp1_state) { + r = adev->powerplay.pp_funcs->set_mp1_state( + adev->powerplay.pp_handle, + adev->mp1_state); + } + if (r) { + DRM_ERROR("SMC failed to set mp1 state %d, %d\n", + adev->mp1_state, r); + return r; + } + } + + adev->ip_blocks[i].status.hw = false; } return 0; @@ -2130,6 +2387,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) for (j = 0; j < adev->num_ip_blocks; j++) { block = &adev->ip_blocks[j]; + block->status.hw = false; if (block->version->type != ip_order[i] || !block->status.valid) continue; @@ -2138,6 +2396,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); if (r) return r; + block->status.hw = true; } } @@ -2165,13 +2424,15 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) block = &adev->ip_blocks[j]; if (block->version->type != ip_order[i] || - !block->status.valid) + !block->status.valid || + block->status.hw) continue; r = block->version->funcs->hw_init(adev); DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); if (r) return r; + block->status.hw = true; } } @@ -2195,17 +2456,19 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) int i, r; for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) + if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { + r = adev->ip_blocks[i].version->funcs->resume(adev); if (r) { DRM_ERROR("resume of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); return r; } + adev->ip_blocks[i].status.hw = true; } } @@ -2230,7 +2493,7 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) int i, r; for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) + if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || @@ -2243,6 +2506,7 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) adev->ip_blocks[i].version->funcs->name, r); return r; } + adev->ip_blocks[i].status.hw = true; } return 0; @@ -2339,6 +2603,14 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) #if defined(CONFIG_DRM_AMD_DC_DCN1_0) case CHIP_RAVEN: #endif +#if defined(CONFIG_DRM_AMD_DC_DCN2_0) + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: +#endif +#if defined(CONFIG_DRM_AMD_DC_DCN2_1) + case CHIP_RENOIR: +#endif return amdgpu_dc != 0; #endif default: @@ -2369,10 +2641,77 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) adev->asic_reset_res = amdgpu_asic_reset(adev); if (adev->asic_reset_res) - DRM_WARN("ASIC reset failed with err r, %d for drm dev, %s", + DRM_WARN("ASIC reset failed with error, %d for drm dev, %s", adev->asic_reset_res, adev->ddev->unique); } +static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) +{ + char *input = amdgpu_lockup_timeout; + char *timeout_setting = NULL; + int index = 0; + long timeout; + int ret = 0; + + /* + * By default timeout for non compute jobs is 10000. + * And there is no timeout enforced on compute jobs. + * In SR-IOV or passthrough mode, timeout for compute + * jobs are 10000 by default. + */ + adev->gfx_timeout = msecs_to_jiffies(10000); + adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; + if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) + adev->compute_timeout = adev->gfx_timeout; + else + adev->compute_timeout = MAX_SCHEDULE_TIMEOUT; + + if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { + while ((timeout_setting = strsep(&input, ",")) && + strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { + ret = kstrtol(timeout_setting, 0, &timeout); + if (ret) + return ret; + + if (timeout == 0) { + index++; + continue; + } else if (timeout < 0) { + timeout = MAX_SCHEDULE_TIMEOUT; + } else { + timeout = msecs_to_jiffies(timeout); + } + + switch (index++) { + case 0: + adev->gfx_timeout = timeout; + break; + case 1: + adev->compute_timeout = timeout; + break; + case 2: + adev->sdma_timeout = timeout; + break; + case 3: + adev->video_timeout = timeout; + break; + default: + break; + } + } + /* + * There is only one value specified and + * it should apply to all non-compute jobs. + */ + if (index == 1) { + adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; + if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) + adev->compute_timeout = adev->gfx_timeout; + } + } + + return ret; +} /** * amdgpu_device_init - initialize the driver @@ -2400,7 +2739,12 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->ddev = ddev; adev->pdev = pdev; adev->flags = flags; - adev->asic_type = flags & AMD_ASIC_MASK; + + if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST) + adev->asic_type = amdgpu_force_asic_type; + else + adev->asic_type = flags & AMD_ASIC_MASK; + adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; if (amdgpu_emu_mode == 1) adev->usec_timeout *= 2; @@ -2421,6 +2765,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->pcie_wreg = &amdgpu_invalid_wreg; adev->pciep_rreg = &amdgpu_invalid_rreg; adev->pciep_wreg = &amdgpu_invalid_wreg; + adev->pcie_rreg64 = &amdgpu_invalid_rreg64; + adev->pcie_wreg64 = &amdgpu_invalid_wreg64; adev->uvd_ctx_rreg = &amdgpu_invalid_rreg; adev->uvd_ctx_wreg = &amdgpu_invalid_wreg; adev->didt_rreg = &amdgpu_invalid_rreg; @@ -2448,8 +2794,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->virt.vf_errors.lock); hash_init(adev->mn_hash); mutex_init(&adev->lock_reset); + mutex_init(&adev->notifier_lock); + mutex_init(&adev->virt.dpm_mutex); + mutex_init(&adev->psp.mutex); - amdgpu_device_check_arguments(adev); + r = amdgpu_device_check_arguments(adev); + if (r) + return r; spin_lock_init(&adev->mmio_idx_lock); spin_lock_init(&adev->smc_idx_lock); @@ -2467,8 +2818,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_LIST_HEAD(&adev->ring_lru_list); spin_lock_init(&adev->ring_lru_list_lock); - INIT_DELAYED_WORK(&adev->late_init_work, - amdgpu_device_ip_late_init_func_handler); + INIT_DELAYED_WORK(&adev->delayed_init_work, + amdgpu_device_delayed_init_work_handler); INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, amdgpu_device_delay_enable_gfx_off); @@ -2505,13 +2856,44 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (adev->rio_mem == NULL) DRM_INFO("PCI I/O BAR is not found.\n"); + /* enable PCIE atomic ops */ + r = pci_enable_atomic_ops_to_root(adev->pdev, + PCI_EXP_DEVCAP2_ATOMIC_COMP32 | + PCI_EXP_DEVCAP2_ATOMIC_COMP64); + if (r) { + adev->have_atomics_support = false; + DRM_INFO("PCIE atomic ops is not supported\n"); + } else { + adev->have_atomics_support = true; + } + amdgpu_device_get_pcie_info(adev); + if (amdgpu_mcbp) + DRM_INFO("MCBP is enabled\n"); + + if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10) + adev->enable_mes = true; + + if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) { + r = amdgpu_discovery_init(adev); + if (r) { + dev_err(adev->dev, "amdgpu_discovery_init failed\n"); + return r; + } + } + /* early init functions */ r = amdgpu_device_ip_early_init(adev); if (r) return r; + r = amdgpu_device_get_job_timeout_settings(adev); + if (r) { + dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); + return r; + } + /* doorbell bar mapping and doorbell index init*/ amdgpu_device_doorbell_init(adev); @@ -2534,19 +2916,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, goto fence_driver_init; } - /* Read BIOS */ - if (!amdgpu_get_bios(adev)) { - r = -EINVAL; - goto failed; - } - - r = amdgpu_atombios_init(adev); - if (r) { - dev_err(adev->dev, "amdgpu_atombios_init failed\n"); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); - goto failed; - } - /* detect if we are with an SRIOV vbios */ amdgpu_device_detect_sriov_bios(adev); @@ -2642,19 +3011,19 @@ fence_driver_init: /* Get a log2 for easy divisions. */ adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps)); - r = amdgpu_ib_pool_init(adev); - if (r) { - dev_err(adev->dev, "IB initialization failed (%d).\n", r); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r); - goto failed; - } - amdgpu_fbdev_init(adev); + if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev)) + amdgpu_pm_virt_sysfs_init(adev); + r = amdgpu_pm_sysfs_init(adev); if (r) DRM_ERROR("registering pm debugfs failed (%d).\n", r); + r = amdgpu_ucode_sysfs_init(adev); + if (r) + DRM_ERROR("Creating firmware sysfs failed (%d).\n", r); + r = amdgpu_debugfs_gem_init(adev); if (r) DRM_ERROR("registering gem debugfs failed (%d).\n", r); @@ -2684,6 +3053,13 @@ fence_driver_init: DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n"); } + /* + * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost. + * Otherwise the mgpu fan boost feature will be skipped due to the + * gpu instance is counted less. + */ + amdgpu_register_gpu_instance(adev); + /* enable clockgating, etc. after ib tests, etc. since some blocks require * explicit gating rather than handling it automatically. */ @@ -2694,6 +3070,23 @@ fence_driver_init: goto failed; } + /* must succeed. */ + amdgpu_ras_resume(adev); + + queue_delayed_work(system_wq, &adev->delayed_init_work, + msecs_to_jiffies(AMDGPU_RESUME_MS)); + + r = device_create_file(adev->dev, &dev_attr_pcie_replay_count); + if (r) { + dev_err(adev->dev, "Could not create pcie_replay_count"); + return r; + } + + if (IS_ENABLED(CONFIG_PERF_EVENTS)) + r = amdgpu_pmu_init(adev); + if (r) + dev_err(adev->dev, "amdgpu_pmu_init failed\n"); + return 0; failed: @@ -2717,7 +3110,9 @@ void amdgpu_device_fini(struct amdgpu_device *adev) int r; DRM_INFO("amdgpu: finishing device.\n"); + flush_delayed_work(&adev->delayed_init_work); adev->shutdown = true; + /* disable all interrupts */ amdgpu_irq_disable_all(adev); if (adev->mode_info.mode_config_initialized){ @@ -2726,7 +3121,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev) else drm_atomic_helper_shutdown(adev->ddev); } - amdgpu_ib_pool_fini(adev); amdgpu_fence_driver_fini(adev); amdgpu_pm_sysfs_fini(adev); amdgpu_fbdev_fini(adev); @@ -2736,7 +3130,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev) adev->firmware.gpu_info_fw = NULL; } adev->accel_working = false; - cancel_delayed_work_sync(&adev->late_init_work); /* free i2c buses */ if (!amdgpu_device_has_dc_support(adev)) amdgpu_i2c_fini(adev); @@ -2757,7 +3150,17 @@ void amdgpu_device_fini(struct amdgpu_device *adev) iounmap(adev->rmmio); adev->rmmio = NULL; amdgpu_device_doorbell_fini(adev); + if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev)) + amdgpu_pm_virt_sysfs_fini(adev); + amdgpu_debugfs_regs_cleanup(adev); + device_remove_file(adev->dev, &dev_attr_pcie_replay_count); + amdgpu_ucode_sysfs_fini(adev); + if (IS_ENABLED(CONFIG_PERF_EVENTS)) + amdgpu_pmu_fini(adev); + amdgpu_debugfs_preempt_cleanup(adev); + if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) + amdgpu_discovery_fini(adev); } @@ -2780,6 +3183,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) struct amdgpu_device *adev; struct drm_crtc *crtc; struct drm_connector *connector; + struct drm_connector_list_iter iter; int r; if (dev == NULL || dev->dev_private == NULL) { @@ -2797,14 +3201,16 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) if (fbcon) amdgpu_fbdev_set_suspend(adev, 1); - cancel_delayed_work_sync(&adev->late_init_work); + cancel_delayed_work_sync(&adev->delayed_init_work); if (!amdgpu_device_has_dc_support(adev)) { /* turn off display hw */ drm_modeset_lock_all(dev); - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { - drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); - } + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) + drm_helper_connector_dpms(connector, + DRM_MODE_DPMS_OFF); + drm_connector_list_iter_end(&iter); drm_modeset_unlock_all(dev); /* unpin the front buffers and cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { @@ -2838,6 +3244,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) amdgpu_amdkfd_suspend(adev); + amdgpu_ras_suspend(adev); + r = amdgpu_device_ip_suspend_phase1(adev); /* evict vram memory */ @@ -2853,15 +3261,11 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) */ amdgpu_bo_evict_vram(adev); - pci_save_state(dev->pdev); if (suspend) { + pci_save_state(dev->pdev); /* Shut down the device */ pci_disable_device(dev->pdev); pci_set_power_state(dev->pdev, PCI_D3hot); - } else { - r = amdgpu_asic_reset(adev); - if (r) - DRM_ERROR("amdgpu asic reset failed\n"); } return 0; @@ -2881,6 +3285,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) { struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_device *adev = dev->dev_private; struct drm_crtc *crtc; int r = 0; @@ -2915,6 +3320,9 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) if (r) return r; + queue_delayed_work(system_wq, &adev->delayed_init_work, + msecs_to_jiffies(AMDGPU_RESUME_MS)); + if (!amdgpu_device_has_dc_support(adev)) { /* pin cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { @@ -2938,7 +3346,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) return r; /* Make sure IB tests flushed */ - flush_delayed_work(&adev->late_init_work); + flush_delayed_work(&adev->delayed_init_work); /* blat the mode back in */ if (fbcon) { @@ -2948,9 +3356,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) /* turn on display hw */ drm_modeset_lock_all(dev); - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { - drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); - } + + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) + drm_helper_connector_dpms(connector, + DRM_MODE_DPMS_ON); + drm_connector_list_iter_end(&iter); + drm_modeset_unlock_all(dev); } amdgpu_fbdev_set_suspend(adev, 0); @@ -2958,6 +3370,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) drm_kms_helper_poll_enable(dev); + amdgpu_ras_resume(adev); + /* * Most of the connector probing functions try to acquire runtime pm * refs to ensure that the GPU is powered on when connector polling is @@ -3165,6 +3579,7 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev) /* No need to recover an evicted BO */ if (shadow->tbo.mem.mem_type != TTM_PL_TT || + shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET || shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM) continue; @@ -3173,11 +3588,16 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev) break; if (fence) { - r = dma_fence_wait_timeout(fence, false, tmo); + tmo = dma_fence_wait_timeout(fence, false, tmo); dma_fence_put(fence); fence = next; - if (r <= 0) + if (tmo == 0) { + r = -ETIMEDOUT; break; + } else if (tmo < 0) { + r = tmo; + break; + } } else { fence = next; } @@ -3188,8 +3608,8 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev) tmo = dma_fence_wait_timeout(fence, false, tmo); dma_fence_put(fence); - if (r <= 0 || tmo <= 0) { - DRM_ERROR("recover vram bo from shadow failed\n"); + if (r < 0 || tmo <= 0) { + DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo); return -EIO; } @@ -3219,6 +3639,8 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, if (r) return r; + amdgpu_amdkfd_pre_reset(adev); + /* Resume IP prior to SMC */ r = amdgpu_device_ip_reinit_early_sriov(adev); if (r) @@ -3238,12 +3660,13 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, amdgpu_irq_gpu_reset_resume_helper(adev); r = amdgpu_ib_ring_tests(adev); + amdgpu_amdkfd_post_reset(adev); error: amdgpu_virt_init_data_exchange(adev); amdgpu_virt_release_full_gpu(adev, true); if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { - atomic_inc(&adev->vram_lost_counter); + amdgpu_inc_vram_lost(adev); r = amdgpu_device_recover_vram(adev); } @@ -3285,6 +3708,7 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) case CHIP_VEGA20: case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_RAVEN: break; default: goto disabled; @@ -3313,8 +3737,6 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, if (!ring || !ring->sched.thread) continue; - drm_sched_stop(&ring->sched); - /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ amdgpu_fence_driver_force_completion(ring); } @@ -3322,8 +3744,7 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, if(job) drm_sched_increase_karma(&job->base); - - + /* Don't suspend on bare metal if we are not going to HW reset the ASIC */ if (!amdgpu_sriov_vf(adev)) { if (!need_full_reset) @@ -3370,7 +3791,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, r = amdgpu_asic_reset(tmp_adev); if (r) { - DRM_ERROR("ASIC reset failed with err r, %d for drm dev, %s", + DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s", r, tmp_adev->ddev->unique); break; } @@ -3405,8 +3826,8 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, vram_lost = amdgpu_device_check_vram_lost(tmp_adev); if (vram_lost) { - DRM_ERROR("VRAM is lost!\n"); - atomic_inc(&tmp_adev->vram_lost_counter); + DRM_INFO("VRAM is lost due to GPU reset!\n"); + amdgpu_inc_vram_lost(tmp_adev); } r = amdgpu_gtt_mgr_recover( @@ -3425,6 +3846,19 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, if (vram_lost) amdgpu_device_fill_reset_magic(tmp_adev); + /* + * Add this ASIC as tracked as reset was already + * complete successfully. + */ + amdgpu_register_gpu_instance(tmp_adev); + + r = amdgpu_device_ip_late_init(tmp_adev); + if (r) + goto out; + + /* must succeed. */ + amdgpu_ras_resume(tmp_adev); + /* Update PSP FW topology after reset */ if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1) r = amdgpu_xgmi_update_topology(hive, tmp_adev); @@ -3456,51 +3890,39 @@ end: return r; } -static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev, - struct amdgpu_job *job) +static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock) { - int i; - - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; - - if (!ring || !ring->sched.thread) - continue; - - if (!adev->asic_reset_res) - drm_sched_resubmit_jobs(&ring->sched); + if (trylock) { + if (!mutex_trylock(&adev->lock_reset)) + return false; + } else + mutex_lock(&adev->lock_reset); - drm_sched_start(&ring->sched, !adev->asic_reset_res); - } - - if (!amdgpu_device_has_dc_support(adev)) { - drm_helper_resume_force_mode(adev->ddev); - } - - adev->asic_reset_res = 0; -} - -static void amdgpu_device_lock_adev(struct amdgpu_device *adev) -{ - mutex_lock(&adev->lock_reset); atomic_inc(&adev->gpu_reset_counter); adev->in_gpu_reset = 1; - /* Block kfd: SRIOV would do it separately */ - if (!amdgpu_sriov_vf(adev)) - amdgpu_amdkfd_pre_reset(adev); + switch (amdgpu_asic_reset_method(adev)) { + case AMD_RESET_METHOD_MODE1: + adev->mp1_state = PP_MP1_STATE_SHUTDOWN; + break; + case AMD_RESET_METHOD_MODE2: + adev->mp1_state = PP_MP1_STATE_RESET; + break; + default: + adev->mp1_state = PP_MP1_STATE_NONE; + break; + } + + return true; } static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) { - /*unlock kfd: SRIOV would do it separately */ - if (!amdgpu_sriov_vf(adev)) - amdgpu_amdkfd_post_reset(adev); amdgpu_vf_error_trans_all(adev); + adev->mp1_state = PP_MP1_STATE_NONE; adev->in_gpu_reset = 0; mutex_unlock(&adev->lock_reset); } - /** * amdgpu_device_gpu_recover - reset the asic and recover scheduler * @@ -3515,41 +3937,65 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job) { - int r; + struct list_head device_list, *device_list_handle = NULL; + bool need_full_reset, job_signaled; struct amdgpu_hive_info *hive = NULL; - bool need_full_reset = false; struct amdgpu_device *tmp_adev = NULL; - struct list_head device_list, *device_list_handle = NULL; + int i, r = 0; + bool in_ras_intr = amdgpu_ras_intr_triggered(); + + /* + * Flush RAM to disk so that after reboot + * the user can read log and see why the system rebooted. + */ + if (in_ras_intr && amdgpu_ras_get_context(adev)->reboot) { + + DRM_WARN("Emergency reboot."); + ksys_sync_helper(); + emergency_restart(); + } + + need_full_reset = job_signaled = false; INIT_LIST_HEAD(&device_list); - dev_info(adev->dev, "GPU reset begin!\n"); + dev_info(adev->dev, "GPU %s begin!\n", in_ras_intr ? "jobs stop":"reset"); + + cancel_delayed_work_sync(&adev->delayed_init_work); + + hive = amdgpu_get_xgmi_hive(adev, false); /* - * In case of XGMI hive disallow concurrent resets to be triggered - * by different nodes. No point also since the one node already executing - * reset will also reset all the other nodes in the hive. + * Here we trylock to avoid chain of resets executing from + * either trigger by jobs on different adevs in XGMI hive or jobs on + * different schedulers for same device while this TO handler is running. + * We always reset all schedulers for device and all devices for XGMI + * hive so that should take care of them too. */ - hive = amdgpu_get_xgmi_hive(adev, 0); - if (hive && adev->gmc.xgmi.num_physical_nodes > 1 && - !mutex_trylock(&hive->reset_lock)) + + if (hive && !mutex_trylock(&hive->reset_lock)) { + DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", + job ? job->base.id : -1, hive->hive_id); return 0; + } /* Start with adev pre asic reset first for soft reset check.*/ - amdgpu_device_lock_adev(adev); - r = amdgpu_device_pre_asic_reset(adev, - job, - &need_full_reset); - if (r) { - /*TODO Should we stop ?*/ - DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ", - r, adev->ddev->unique); - adev->asic_reset_res = r; + if (!amdgpu_device_lock_adev(adev, !hive)) { + DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress", + job ? job->base.id : -1); + return 0; } + /* Block kfd: SRIOV would do it separately */ + if (!amdgpu_sriov_vf(adev)) + amdgpu_amdkfd_pre_reset(adev); + /* Build list of devices to reset */ - if (need_full_reset && adev->gmc.xgmi.num_physical_nodes > 1) { + if (adev->gmc.xgmi.num_physical_nodes > 1) { if (!hive) { + /*unlock kfd: SRIOV would do it separately */ + if (!amdgpu_sriov_vf(adev)) + amdgpu_amdkfd_post_reset(adev); amdgpu_device_unlock_adev(adev); return -ENODEV; } @@ -3565,13 +4011,72 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, device_list_handle = &device_list; } + /* block all schedulers and reset given job's ring */ + list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { + if (tmp_adev != adev) { + amdgpu_device_lock_adev(tmp_adev, false); + if (!amdgpu_sriov_vf(tmp_adev)) + amdgpu_amdkfd_pre_reset(tmp_adev); + } + + /* + * Mark these ASICs to be reseted as untracked first + * And add them back after reset completed + */ + amdgpu_unregister_gpu_instance(tmp_adev); + + /* disable ras on ALL IPs */ + if (!in_ras_intr && amdgpu_device_ip_need_full_reset(tmp_adev)) + amdgpu_ras_suspend(tmp_adev); + + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + struct amdgpu_ring *ring = tmp_adev->rings[i]; + + if (!ring || !ring->sched.thread) + continue; + + drm_sched_stop(&ring->sched, job ? &job->base : NULL); + + if (in_ras_intr) + amdgpu_job_stop_all_jobs_on_sched(&ring->sched); + } + } + + + if (in_ras_intr) + goto skip_sched_resume; + + /* + * Must check guilty signal here since after this point all old + * HW fences are force signaled. + * + * job->base holds a reference to parent fence + */ + if (job && job->base.s_fence->parent && + dma_fence_is_signaled(job->base.s_fence->parent)) + job_signaled = true; + + if (job_signaled) { + dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); + goto skip_hw_reset; + } + + + /* Guilty job will be freed after this*/ + r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset); + if (r) { + /*TODO Should we stop ?*/ + DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ", + r, adev->ddev->unique); + adev->asic_reset_res = r; + } + retry: /* Rest of adevs pre asic reset from XGMI hive. */ list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { if (tmp_adev == adev) continue; - amdgpu_device_lock_adev(tmp_adev); r = amdgpu_device_pre_asic_reset(tmp_adev, NULL, &need_full_reset); @@ -3595,22 +4100,48 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ goto retry; } +skip_hw_reset: + /* Post ASIC reset for all devs .*/ list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { - amdgpu_device_post_asic_reset(tmp_adev, tmp_adev == adev ? job : NULL); + + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + struct amdgpu_ring *ring = tmp_adev->rings[i]; + + if (!ring || !ring->sched.thread) + continue; + + /* No point to resubmit jobs if we didn't HW reset*/ + if (!tmp_adev->asic_reset_res && !job_signaled) + drm_sched_resubmit_jobs(&ring->sched); + + drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res); + } + + if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) { + drm_helper_resume_force_mode(tmp_adev->ddev); + } + + tmp_adev->asic_reset_res = 0; if (r) { /* bad news, how to tell it to userspace ? */ - dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter)); + dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter)); amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); } else { - dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter)); + dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); } + } +skip_sched_resume: + list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { + /*unlock kfd: SRIOV would do it separately */ + if (!in_ras_intr && !amdgpu_sriov_vf(tmp_adev)) + amdgpu_amdkfd_post_reset(tmp_adev); amdgpu_device_unlock_adev(tmp_adev); } - if (hive && adev->gmc.xgmi.num_physical_nodes > 1) + if (hive) mutex_unlock(&hive->reset_lock); if (r) @@ -3618,43 +4149,6 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ return r; } -static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev, - enum pci_bus_speed *speed, - enum pcie_link_width *width) -{ - struct pci_dev *pdev = adev->pdev; - enum pci_bus_speed cur_speed; - enum pcie_link_width cur_width; - u32 ret = 1; - - *speed = PCI_SPEED_UNKNOWN; - *width = PCIE_LNK_WIDTH_UNKNOWN; - - while (pdev) { - cur_speed = pcie_get_speed_cap(pdev); - cur_width = pcie_get_width_cap(pdev); - ret = pcie_bandwidth_available(adev->pdev, NULL, - NULL, &cur_width); - if (!ret) - cur_width = PCIE_LNK_WIDTH_RESRV; - - if (cur_speed != PCI_SPEED_UNKNOWN) { - if (*speed == PCI_SPEED_UNKNOWN) - *speed = cur_speed; - else if (cur_speed < *speed) - *speed = cur_speed; - } - - if (cur_width != PCIE_LNK_WIDTH_UNKNOWN) { - if (*width == PCIE_LNK_WIDTH_UNKNOWN) - *width = cur_width; - else if (cur_width < *width) - *width = cur_width; - } - pdev = pci_upstream_bridge(pdev); - } -} - /** * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot * @@ -3688,8 +4182,8 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask) return; - amdgpu_device_get_min_pci_speed_width(adev, &platform_speed_cap, - &platform_link_width); + pcie_bandwidth_available(adev->pdev, NULL, + &platform_speed_cap, &platform_link_width); if (adev->pm.pcie_gen_mask == 0) { /* asic caps */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c new file mode 100644 index 000000000000..f95092741c38 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -0,0 +1,407 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_discovery.h" +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "nbio/nbio_2_3_offset.h" +#include "discovery.h" + +#define mmRCC_CONFIG_MEMSIZE 0xde3 +#define mmMM_INDEX 0x0 +#define mmMM_INDEX_HI 0x6 +#define mmMM_DATA 0x1 +#define HW_ID_MAX 300 + +const char *hw_id_names[HW_ID_MAX] = { + [MP1_HWID] = "MP1", + [MP2_HWID] = "MP2", + [THM_HWID] = "THM", + [SMUIO_HWID] = "SMUIO", + [FUSE_HWID] = "FUSE", + [CLKA_HWID] = "CLKA", + [PWR_HWID] = "PWR", + [GC_HWID] = "GC", + [UVD_HWID] = "UVD", + [AUDIO_AZ_HWID] = "AUDIO_AZ", + [ACP_HWID] = "ACP", + [DCI_HWID] = "DCI", + [DMU_HWID] = "DMU", + [DCO_HWID] = "DCO", + [DIO_HWID] = "DIO", + [XDMA_HWID] = "XDMA", + [DCEAZ_HWID] = "DCEAZ", + [DAZ_HWID] = "DAZ", + [SDPMUX_HWID] = "SDPMUX", + [NTB_HWID] = "NTB", + [IOHC_HWID] = "IOHC", + [L2IMU_HWID] = "L2IMU", + [VCE_HWID] = "VCE", + [MMHUB_HWID] = "MMHUB", + [ATHUB_HWID] = "ATHUB", + [DBGU_NBIO_HWID] = "DBGU_NBIO", + [DFX_HWID] = "DFX", + [DBGU0_HWID] = "DBGU0", + [DBGU1_HWID] = "DBGU1", + [OSSSYS_HWID] = "OSSSYS", + [HDP_HWID] = "HDP", + [SDMA0_HWID] = "SDMA0", + [SDMA1_HWID] = "SDMA1", + [ISP_HWID] = "ISP", + [DBGU_IO_HWID] = "DBGU_IO", + [DF_HWID] = "DF", + [CLKB_HWID] = "CLKB", + [FCH_HWID] = "FCH", + [DFX_DAP_HWID] = "DFX_DAP", + [L1IMU_PCIE_HWID] = "L1IMU_PCIE", + [L1IMU_NBIF_HWID] = "L1IMU_NBIF", + [L1IMU_IOAGR_HWID] = "L1IMU_IOAGR", + [L1IMU3_HWID] = "L1IMU3", + [L1IMU4_HWID] = "L1IMU4", + [L1IMU5_HWID] = "L1IMU5", + [L1IMU6_HWID] = "L1IMU6", + [L1IMU7_HWID] = "L1IMU7", + [L1IMU8_HWID] = "L1IMU8", + [L1IMU9_HWID] = "L1IMU9", + [L1IMU10_HWID] = "L1IMU10", + [L1IMU11_HWID] = "L1IMU11", + [L1IMU12_HWID] = "L1IMU12", + [L1IMU13_HWID] = "L1IMU13", + [L1IMU14_HWID] = "L1IMU14", + [L1IMU15_HWID] = "L1IMU15", + [WAFLC_HWID] = "WAFLC", + [FCH_USB_PD_HWID] = "FCH_USB_PD", + [PCIE_HWID] = "PCIE", + [PCS_HWID] = "PCS", + [DDCL_HWID] = "DDCL", + [SST_HWID] = "SST", + [IOAGR_HWID] = "IOAGR", + [NBIF_HWID] = "NBIF", + [IOAPIC_HWID] = "IOAPIC", + [SYSTEMHUB_HWID] = "SYSTEMHUB", + [NTBCCP_HWID] = "NTBCCP", + [UMC_HWID] = "UMC", + [SATA_HWID] = "SATA", + [USB_HWID] = "USB", + [CCXSEC_HWID] = "CCXSEC", + [XGMI_HWID] = "XGMI", + [XGBE_HWID] = "XGBE", + [MP0_HWID] = "MP0", +}; + +static int hw_id_map[MAX_HWIP] = { + [GC_HWIP] = GC_HWID, + [HDP_HWIP] = HDP_HWID, + [SDMA0_HWIP] = SDMA0_HWID, + [SDMA1_HWIP] = SDMA1_HWID, + [MMHUB_HWIP] = MMHUB_HWID, + [ATHUB_HWIP] = ATHUB_HWID, + [NBIO_HWIP] = NBIF_HWID, + [MP0_HWIP] = MP0_HWID, + [MP1_HWIP] = MP1_HWID, + [UVD_HWIP] = UVD_HWID, + [VCE_HWIP] = VCE_HWID, + [DF_HWIP] = DF_HWID, + [DCE_HWIP] = DMU_HWID, + [OSSSYS_HWIP] = OSSSYS_HWID, + [SMUIO_HWIP] = SMUIO_HWID, + [PWR_HWIP] = PWR_HWID, + [NBIF_HWIP] = NBIF_HWID, + [THM_HWIP] = THM_HWID, + [CLK_HWIP] = CLKA_HWID, +}; + +static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary) +{ + uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; + uint64_t pos = vram_size - DISCOVERY_TMR_SIZE; + + amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, DISCOVERY_TMR_SIZE, false); + return 0; +} + +static uint16_t amdgpu_discovery_calculate_checksum(uint8_t *data, uint32_t size) +{ + uint16_t checksum = 0; + int i; + + for (i = 0; i < size; i++) + checksum += data[i]; + + return checksum; +} + +static inline bool amdgpu_discovery_verify_checksum(uint8_t *data, uint32_t size, + uint16_t expected) +{ + return !!(amdgpu_discovery_calculate_checksum(data, size) == expected); +} + +int amdgpu_discovery_init(struct amdgpu_device *adev) +{ + struct table_info *info; + struct binary_header *bhdr; + struct ip_discovery_header *ihdr; + struct gpu_info_header *ghdr; + uint16_t offset; + uint16_t size; + uint16_t checksum; + int r; + + adev->discovery = kzalloc(DISCOVERY_TMR_SIZE, GFP_KERNEL); + if (!adev->discovery) + return -ENOMEM; + + r = amdgpu_discovery_read_binary(adev, adev->discovery); + if (r) { + DRM_ERROR("failed to read ip discovery binary\n"); + goto out; + } + + bhdr = (struct binary_header *)adev->discovery; + + if (le32_to_cpu(bhdr->binary_signature) != BINARY_SIGNATURE) { + DRM_ERROR("invalid ip discovery binary signature\n"); + r = -EINVAL; + goto out; + } + + offset = offsetof(struct binary_header, binary_checksum) + + sizeof(bhdr->binary_checksum); + size = bhdr->binary_size - offset; + checksum = bhdr->binary_checksum; + + if (!amdgpu_discovery_verify_checksum(adev->discovery + offset, + size, checksum)) { + DRM_ERROR("invalid ip discovery binary checksum\n"); + r = -EINVAL; + goto out; + } + + info = &bhdr->table_list[IP_DISCOVERY]; + offset = le16_to_cpu(info->offset); + checksum = le16_to_cpu(info->checksum); + ihdr = (struct ip_discovery_header *)(adev->discovery + offset); + + if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) { + DRM_ERROR("invalid ip discovery data table signature\n"); + r = -EINVAL; + goto out; + } + + if (!amdgpu_discovery_verify_checksum(adev->discovery + offset, + ihdr->size, checksum)) { + DRM_ERROR("invalid ip discovery data table checksum\n"); + r = -EINVAL; + goto out; + } + + info = &bhdr->table_list[GC]; + offset = le16_to_cpu(info->offset); + checksum = le16_to_cpu(info->checksum); + ghdr = (struct gpu_info_header *)(adev->discovery + offset); + + if (!amdgpu_discovery_verify_checksum(adev->discovery + offset, + ghdr->size, checksum)) { + DRM_ERROR("invalid gc data table checksum\n"); + r = -EINVAL; + goto out; + } + + return 0; + +out: + kfree(adev->discovery); + adev->discovery = NULL; + + return r; +} + +void amdgpu_discovery_fini(struct amdgpu_device *adev) +{ + kfree(adev->discovery); + adev->discovery = NULL; +} + +int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) +{ + struct binary_header *bhdr; + struct ip_discovery_header *ihdr; + struct die_header *dhdr; + struct ip *ip; + uint16_t die_offset; + uint16_t ip_offset; + uint16_t num_dies; + uint16_t num_ips; + uint8_t num_base_address; + int hw_ip; + int i, j, k; + + if (!adev->discovery) { + DRM_ERROR("ip discovery uninitialized\n"); + return -EINVAL; + } + + bhdr = (struct binary_header *)adev->discovery; + ihdr = (struct ip_discovery_header *)(adev->discovery + + le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset)); + num_dies = le16_to_cpu(ihdr->num_dies); + + DRM_DEBUG("number of dies: %d\n", num_dies); + + for (i = 0; i < num_dies; i++) { + die_offset = le16_to_cpu(ihdr->die_info[i].die_offset); + dhdr = (struct die_header *)(adev->discovery + die_offset); + num_ips = le16_to_cpu(dhdr->num_ips); + ip_offset = die_offset + sizeof(*dhdr); + + if (le16_to_cpu(dhdr->die_id) != i) { + DRM_ERROR("invalid die id %d, expected %d\n", + le16_to_cpu(dhdr->die_id), i); + return -EINVAL; + } + + DRM_DEBUG("number of hardware IPs on die%d: %d\n", + le16_to_cpu(dhdr->die_id), num_ips); + + for (j = 0; j < num_ips; j++) { + ip = (struct ip *)(adev->discovery + ip_offset); + num_base_address = ip->num_base_address; + + DRM_DEBUG("%s(%d) #%d v%d.%d.%d:\n", + hw_id_names[le16_to_cpu(ip->hw_id)], + le16_to_cpu(ip->hw_id), + ip->number_instance, + ip->major, ip->minor, + ip->revision); + + for (k = 0; k < num_base_address; k++) { + /* + * convert the endianness of base addresses in place, + * so that we don't need to convert them when accessing adev->reg_offset. + */ + ip->base_address[k] = le32_to_cpu(ip->base_address[k]); + DRM_DEBUG("\t0x%08x\n", ip->base_address[k]); + } + + for (hw_ip = 0; hw_ip < MAX_HWIP; hw_ip++) { + if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id)) { + DRM_INFO("set register base offset for %s\n", + hw_id_names[le16_to_cpu(ip->hw_id)]); + adev->reg_offset[hw_ip][ip->number_instance] = + ip->base_address; + } + + } + + ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); + } + } + + return 0; +} + +int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, + int *major, int *minor, int *revision) +{ + struct binary_header *bhdr; + struct ip_discovery_header *ihdr; + struct die_header *dhdr; + struct ip *ip; + uint16_t die_offset; + uint16_t ip_offset; + uint16_t num_dies; + uint16_t num_ips; + int i, j; + + if (!adev->discovery) { + DRM_ERROR("ip discovery uninitialized\n"); + return -EINVAL; + } + + bhdr = (struct binary_header *)adev->discovery; + ihdr = (struct ip_discovery_header *)(adev->discovery + + le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset)); + num_dies = le16_to_cpu(ihdr->num_dies); + + for (i = 0; i < num_dies; i++) { + die_offset = le16_to_cpu(ihdr->die_info[i].die_offset); + dhdr = (struct die_header *)(adev->discovery + die_offset); + num_ips = le16_to_cpu(dhdr->num_ips); + ip_offset = die_offset + sizeof(*dhdr); + + for (j = 0; j < num_ips; j++) { + ip = (struct ip *)(adev->discovery + ip_offset); + + if (le16_to_cpu(ip->hw_id) == hw_id) { + if (major) + *major = ip->major; + if (minor) + *minor = ip->minor; + if (revision) + *revision = ip->revision; + return 0; + } + ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); + } + } + + return -EINVAL; +} + +int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) +{ + struct binary_header *bhdr; + struct gc_info_v1_0 *gc_info; + + if (!adev->discovery) { + DRM_ERROR("ip discovery uninitialized\n"); + return -EINVAL; + } + + bhdr = (struct binary_header *)adev->discovery; + gc_info = (struct gc_info_v1_0 *)(adev->discovery + + le16_to_cpu(bhdr->table_list[GC].offset)); + + adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se); + adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) + + le32_to_cpu(gc_info->gc_num_wgp1_per_sa)); + adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se); + adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se); + adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c); + adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs); + adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->gc_num_max_gs_thds); + adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->gc_gs_table_depth); + adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->gc_gsprim_buff_depth); + adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->gc_double_offchip_lds_buffer); + adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->gc_wave_size); + adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->gc_max_waves_per_simd); + adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->gc_max_scratch_slots_per_cu); + adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->gc_lds_size); + adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->gc_num_sc_per_se) / + le32_to_cpu(gc_info->gc_num_sa_per_se); + adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->gc_num_packer_per_sc); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h new file mode 100644 index 000000000000..ba78e15d9b05 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h @@ -0,0 +1,36 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_DISCOVERY__ +#define __AMDGPU_DISCOVERY__ + +#define DISCOVERY_TMR_SIZE (64 << 10) + +int amdgpu_discovery_init(struct amdgpu_device *adev); +void amdgpu_discovery_fini(struct amdgpu_device *adev); +int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev); +int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, + int *major, int *minor, int *revision); +int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev); + +#endif /* __AMDGPU_DISCOVERY__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index b083b219b1a9..3cadb0b76f22 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -23,7 +23,7 @@ * Authors: Dave Airlie * Alex Deucher */ -#include <drm/drmP.h> + #include <drm/amdgpu_drm.h> #include "amdgpu.h" #include "amdgpu_i2c.h" @@ -32,11 +32,13 @@ #include "amdgpu_display.h" #include <asm/div64.h> +#include <linux/pci.h> #include <linux/pm_runtime.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_edid.h> #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_fb_helper.h> +#include <drm/drm_vblank.h> static void amdgpu_display_flip_callback(struct dma_fence *f, struct dma_fence_cb *cb) @@ -189,7 +191,8 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, } if (!adev->enable_virtual_display) { - r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev)); + r = amdgpu_bo_pin(new_abo, + amdgpu_display_supported_domains(adev, new_abo->flags)); if (unlikely(r != 0)) { DRM_ERROR("failed to pin new abo buffer before flip\n"); goto unreserve; @@ -202,7 +205,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, goto unpin; } - r = reservation_object_get_fences_rcu(new_abo->tbo.resv, &work->excl, + r = dma_resv_get_fences_rcu(new_abo->tbo.base.resv, &work->excl, &work->shared_count, &work->shared); if (unlikely(r != 0)) { @@ -367,11 +370,13 @@ void amdgpu_display_print_display_setup(struct drm_device *dev) struct amdgpu_connector *amdgpu_connector; struct drm_encoder *encoder; struct amdgpu_encoder *amdgpu_encoder; + struct drm_connector_list_iter iter; uint32_t devices; int i = 0; + drm_connector_list_iter_begin(dev, &iter); DRM_INFO("AMDGPU Display Connectors\n"); - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_for_each_connector_iter(connector, &iter) { amdgpu_connector = to_amdgpu_connector(connector); DRM_INFO("Connector %d:\n", i); DRM_INFO(" %s\n", connector->name); @@ -435,6 +440,7 @@ void amdgpu_display_print_display_setup(struct drm_device *dev) } i++; } + drm_connector_list_iter_end(&iter); } /** @@ -493,13 +499,25 @@ static const struct drm_framebuffer_funcs amdgpu_fb_funcs = { .create_handle = drm_gem_fb_create_handle, }; -uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev) +uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, + uint64_t bo_flags) { uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; #if defined(CONFIG_DRM_AMD_DC) - if (adev->asic_type >= CHIP_CARRIZO && adev->asic_type < CHIP_RAVEN && - adev->flags & AMD_IS_APU && + /* + * if amdgpu_bo_support_uswc returns false it means that USWC mappings + * is not supported for this board. But this mapping is required + * to avoid hang caused by placement of scanout BO in GTT on certain + * APUs. So force the BO placement to VRAM in case this architecture + * will not allow USWC mappings. + * Also, don't allow GTT domain if the BO doens't have USWC falg set. + */ + if (adev->asic_type >= CHIP_CARRIZO && + adev->asic_type < CHIP_RAVEN && + (adev->flags & AMD_IS_APU) && + (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) && + amdgpu_bo_support_uswc(bo_flags) && amdgpu_device_asic_has_dc_support(adev->asic_type)) domain |= AMDGPU_GEM_DOMAIN_GTT; #endif @@ -631,10 +649,6 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev) amdgpu_dither_enum_list, sz); if (amdgpu_device_has_dc_support(adev)) { - adev->mode_info.max_bpc_property = - drm_property_create_range(adev->ddev, 0, "max bpc", 8, 16); - if (!adev->mode_info.max_bpc_property) - return -ENOMEM; adev->mode_info.abm_level_property = drm_property_create_range(adev->ddev, 0, "abm level", 0, 4); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index 06b922fe0d42..3620b24785e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -38,7 +38,8 @@ int amdgpu_display_freesync_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); void amdgpu_display_update_priority(struct amdgpu_device *adev); -uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev); +uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, + uint64_t bo_flags); struct drm_framebuffer * amdgpu_display_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index a38e0fb4a6fe..e2eec7b66334 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -1,5 +1,5 @@ /* - * Copyright 2012 Advanced Micro Devices, Inc. + * Copyright 2019 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -31,32 +31,15 @@ * objects between different devices via PRIME <prime_buffer_sharing>`. */ -#include <drm/drmP.h> - #include "amdgpu.h" #include "amdgpu_display.h" #include "amdgpu_gem.h" +#include "amdgpu_dma_buf.h" #include <drm/amdgpu_drm.h> #include <linux/dma-buf.h> #include <linux/dma-fence-array.h> /** - * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table - * implementation - * @obj: GEM buffer object (BO) - * - * Returns: - * A scatter/gather table for the pinned pages of the BO's memory. - */ -struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj) -{ - struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - int npages = bo->tbo.num_pages; - - return drm_prime_pages_to_sg(bo->tbo.ttm->pages, npages); -} - -/** * amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation * @obj: GEM BO * @@ -103,7 +86,8 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) * Returns: * 0 on success or a negative error code on failure. */ -int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, + struct vm_area_struct *vma) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -137,75 +121,24 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma return ret; } -/** - * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table - * implementation - * @dev: DRM device - * @attach: DMA-buf attachment - * @sg: Scatter/gather table - * - * Imports shared DMA buffer memory exported by another device. - * - * Returns: - * A new GEM BO of the given DRM device, representing the memory - * described by the given DMA-buf attachment and scatter/gather table. - */ -struct drm_gem_object * -amdgpu_gem_prime_import_sg_table(struct drm_device *dev, - struct dma_buf_attachment *attach, - struct sg_table *sg) -{ - struct reservation_object *resv = attach->dmabuf->resv; - struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_bo *bo; - struct amdgpu_bo_param bp; - int ret; - - memset(&bp, 0, sizeof(bp)); - bp.size = attach->dmabuf->size; - bp.byte_align = PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_CPU; - bp.flags = 0; - bp.type = ttm_bo_type_sg; - bp.resv = resv; - ww_mutex_lock(&resv->lock, NULL); - ret = amdgpu_bo_create(adev, &bp, &bo); - if (ret) - goto error; - - bo->tbo.sg = sg; - bo->tbo.ttm->sg = sg; - bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; - bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; - if (attach->dmabuf->ops != &amdgpu_dmabuf_ops) - bo->prime_shared_count = 1; - - ww_mutex_unlock(&resv->lock); - return &bo->gem_base; - -error: - ww_mutex_unlock(&resv->lock); - return ERR_PTR(ret); -} - static int -__reservation_object_make_exclusive(struct reservation_object *obj) +__dma_resv_make_exclusive(struct dma_resv *obj) { struct dma_fence **fences; unsigned int count; int r; - if (!reservation_object_get_list(obj)) /* no shared fences to convert */ + if (!dma_resv_get_list(obj)) /* no shared fences to convert */ return 0; - r = reservation_object_get_fences_rcu(obj, NULL, &count, &fences); + r = dma_resv_get_fences_rcu(obj, NULL, &count, &fences); if (r) return r; if (count == 0) { /* Now that was unexpected. */ } else if (count == 1) { - reservation_object_add_excl_fence(obj, fences[0]); + dma_resv_add_excl_fence(obj, fences[0]); dma_fence_put(fences[0]); kfree(fences); } else { @@ -217,7 +150,7 @@ __reservation_object_make_exclusive(struct reservation_object *obj) if (!array) goto err_fences_put; - reservation_object_add_excl_fence(obj, &array->base); + dma_resv_add_excl_fence(obj, &array->base); dma_fence_put(&array->base); } @@ -231,110 +164,130 @@ err_fences_put: } /** - * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation - * @dma_buf: Shared DMA buffer - * @attach: DMA-buf attachment + * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation * - * Makes sure that the shared DMA buffer can be accessed by the target device. - * For now, simply pins it to the GTT domain, where it should be accessible by - * all DMA devices. + * @dmabuf: DMA-buf where we attach to + * @attach: attachment to add * - * Returns: - * 0 on success or a negative error code on failure. + * Add the attachment as user to the exported DMA-buf. */ -static int amdgpu_gem_map_attach(struct dma_buf *dma_buf, +static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach) { - struct drm_gem_object *obj = dma_buf->priv; + struct drm_gem_object *obj = dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - long r; + int r; - r = drm_gem_map_attach(dma_buf, attach); - if (r) - return r; + if (attach->dev->driver == adev->dev->driver) + return 0; r = amdgpu_bo_reserve(bo, false); if (unlikely(r != 0)) - goto error_detach; - - - if (attach->dev->driver != adev->dev->driver) { - /* - * We only create shared fences for internal use, but importers - * of the dmabuf rely on exclusive fences for implicitly - * tracking write hazards. As any of the current fences may - * correspond to a write, we need to convert all existing - * fences on the reservation object into a single exclusive - * fence. - */ - r = __reservation_object_make_exclusive(bo->tbo.resv); - if (r) - goto error_unreserve; - } + return r; - /* pin buffer into GTT */ - r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + /* + * We only create shared fences for internal use, but importers + * of the dmabuf rely on exclusive fences for implicitly + * tracking write hazards. As any of the current fences may + * correspond to a write, we need to convert all existing + * fences on the reservation object into a single exclusive + * fence. + */ + r = __dma_resv_make_exclusive(bo->tbo.base.resv); if (r) - goto error_unreserve; - - if (attach->dev->driver != adev->dev->driver) - bo->prime_shared_count++; + return r; -error_unreserve: + bo->prime_shared_count++; amdgpu_bo_unreserve(bo); - -error_detach: - if (r) - drm_gem_map_detach(dma_buf, attach); - return r; + return 0; } /** - * amdgpu_gem_map_detach - &dma_buf_ops.detach implementation - * @dma_buf: Shared DMA buffer - * @attach: DMA-buf attachment + * amdgpu_dma_buf_detach - &dma_buf_ops.detach implementation * - * This is called when a shared DMA buffer no longer needs to be accessible by - * another device. For now, simply unpins the buffer from GTT. + * @dmabuf: DMA-buf where we remove the attachment from + * @attach: the attachment to remove + * + * Called when an attachment is removed from the DMA-buf. */ -static void amdgpu_gem_map_detach(struct dma_buf *dma_buf, +static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach) { - struct drm_gem_object *obj = dma_buf->priv; + struct drm_gem_object *obj = dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - int ret = 0; - ret = amdgpu_bo_reserve(bo, true); - if (unlikely(ret != 0)) - goto error; - - amdgpu_bo_unpin(bo); if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count) bo->prime_shared_count--; - amdgpu_bo_unreserve(bo); - -error: - drm_gem_map_detach(dma_buf, attach); } /** - * amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation - * @obj: GEM BO + * amdgpu_dma_buf_map - &dma_buf_ops.map_dma_buf implementation + * @attach: DMA-buf attachment + * @dir: DMA direction + * + * Makes sure that the shared DMA buffer can be accessed by the target device. + * For now, simply pins it to the GTT domain, where it should be accessible by + * all DMA devices. * * Returns: - * The BO's reservation object. + * sg_table filled with the DMA addresses to use or ERR_PRT with negative error + * code. + */ +static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, + enum dma_data_direction dir) +{ + struct dma_buf *dma_buf = attach->dmabuf; + struct drm_gem_object *obj = dma_buf->priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + struct sg_table *sgt; + long r; + + r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + if (r) + return ERR_PTR(r); + + sgt = drm_prime_pages_to_sg(bo->tbo.ttm->pages, bo->tbo.num_pages); + if (IS_ERR(sgt)) + return sgt; + + if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir, + DMA_ATTR_SKIP_CPU_SYNC)) + goto error_free; + + return sgt; + +error_free: + sg_free_table(sgt); + kfree(sgt); + return ERR_PTR(-ENOMEM); +} + +/** + * amdgpu_dma_buf_unmap - &dma_buf_ops.unmap_dma_buf implementation + * @attach: DMA-buf attachment + * @sgt: sg_table to unmap + * @dir: DMA direction + * + * This is called when a shared DMA buffer no longer needs to be accessible by + * another device. For now, simply unpins the buffer from GTT. */ -struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) +static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach, + struct sg_table *sgt, + enum dma_data_direction dir) { + struct drm_gem_object *obj = attach->dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - return bo->tbo.resv; + dma_unmap_sg(attach->dev, sgt->sgl, sgt->nents, dir); + sg_free_table(sgt); + kfree(sgt); + amdgpu_bo_unpin(bo); } /** - * amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation + * amdgpu_dma_buf_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation * @dma_buf: Shared DMA buffer * @direction: Direction of DMA transfer * @@ -345,13 +298,13 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) * Returns: * 0 on success or a negative error code on failure. */ -static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, - enum dma_data_direction direction) +static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, + enum dma_data_direction direction) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct ttm_operation_ctx ctx = { true, false }; - u32 domain = amdgpu_display_supported_domains(adev); + u32 domain = amdgpu_display_supported_domains(adev, bo->flags); int ret; bool reads = (direction == DMA_BIDIRECTIONAL || direction == DMA_FROM_DEVICE); @@ -374,12 +327,13 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, } const struct dma_buf_ops amdgpu_dmabuf_ops = { - .attach = amdgpu_gem_map_attach, - .detach = amdgpu_gem_map_detach, - .map_dma_buf = drm_gem_map_dma_buf, - .unmap_dma_buf = drm_gem_unmap_dma_buf, + .dynamic_mapping = true, + .attach = amdgpu_dma_buf_attach, + .detach = amdgpu_dma_buf_detach, + .map_dma_buf = amdgpu_dma_buf_map, + .unmap_dma_buf = amdgpu_dma_buf_unmap, .release = drm_gem_dmabuf_release, - .begin_cpu_access = amdgpu_gem_begin_cpu_access, + .begin_cpu_access = amdgpu_dma_buf_begin_cpu_access, .mmap = drm_gem_dmabuf_mmap, .vmap = drm_gem_dmabuf_vmap, .vunmap = drm_gem_dmabuf_vunmap, @@ -387,18 +341,15 @@ const struct dma_buf_ops amdgpu_dmabuf_ops = { /** * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation - * @dev: DRM device * @gobj: GEM BO * @flags: Flags such as DRM_CLOEXEC and DRM_RDWR. * - * The main work is done by the &drm_gem_prime_export helper, which in turn - * uses &amdgpu_gem_prime_res_obj. + * The main work is done by the &drm_gem_prime_export helper. * * Returns: * Shared DMA buffer representing the GEM BO from the given device. */ -struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, - struct drm_gem_object *gobj, +struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj, int flags) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); @@ -408,9 +359,9 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) return ERR_PTR(-EPERM); - buf = drm_gem_prime_export(dev, gobj, flags); + buf = drm_gem_prime_export(gobj, flags); if (!IS_ERR(buf)) { - buf->file->f_mapping = dev->anon_inode->i_mapping; + buf->file->f_mapping = gobj->dev->anon_inode->i_mapping; buf->ops = &amdgpu_dmabuf_ops; } @@ -418,19 +369,65 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, } /** + * amdgpu_dma_buf_create_obj - create BO for DMA-buf import + * + * @dev: DRM device + * @dma_buf: DMA-buf + * + * Creates an empty SG BO for DMA-buf import. + * + * Returns: + * A new GEM BO of the given DRM device, representing the memory + * described by the given DMA-buf attachment and scatter/gather table. + */ +static struct drm_gem_object * +amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf) +{ + struct dma_resv *resv = dma_buf->resv; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_bo *bo; + struct amdgpu_bo_param bp; + int ret; + + memset(&bp, 0, sizeof(bp)); + bp.size = dma_buf->size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_CPU; + bp.flags = 0; + bp.type = ttm_bo_type_sg; + bp.resv = resv; + dma_resv_lock(resv, NULL); + ret = amdgpu_bo_create(adev, &bp, &bo); + if (ret) + goto error; + + bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; + bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; + if (dma_buf->ops != &amdgpu_dmabuf_ops) + bo->prime_shared_count = 1; + + dma_resv_unlock(resv); + return &bo->tbo.base; + +error: + dma_resv_unlock(resv); + return ERR_PTR(ret); +} + +/** * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation * @dev: DRM device * @dma_buf: Shared DMA buffer * - * The main work is done by the &drm_gem_prime_import helper, which in turn - * uses &amdgpu_gem_prime_import_sg_table. + * Import a dma_buf into a the driver and potentially create a new GEM object. * * Returns: * GEM BO representing the shared DMA buffer for the given device. */ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, - struct dma_buf *dma_buf) + struct dma_buf *dma_buf) { + struct dma_buf_attachment *attach; struct drm_gem_object *obj; if (dma_buf->ops == &amdgpu_dmabuf_ops) { @@ -445,5 +442,17 @@ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, } } - return drm_gem_prime_import(dev, dma_buf); + obj = amdgpu_dma_buf_create_obj(dev, dma_buf); + if (IS_ERR(obj)) + return obj; + + attach = dma_buf_dynamic_attach(dma_buf, dev->dev, true); + if (IS_ERR(attach)) { + drm_gem_object_put(obj); + return ERR_CAST(attach); + } + + get_dma_buf(dma_buf); + obj->import_attach = attach; + return obj; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h new file mode 100644 index 000000000000..ec447a7b6b28 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h @@ -0,0 +1,39 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_DMA_BUF_H__ +#define __AMDGPU_DMA_BUF_H__ + +#include <drm/drm_gem.h> + +struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj, + int flags); +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf); +void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); +void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, + struct vm_area_struct *vma); + +extern const struct dma_buf_ops amdgpu_dmabuf_ops; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 68959b923f89..3fa18003d4d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -51,6 +51,7 @@ struct amdgpu_doorbell_index { uint32_t userqueue_start; uint32_t userqueue_end; uint32_t gfx_ring0; + uint32_t gfx_ring1; uint32_t sdma_engine[8]; uint32_t ih; union { @@ -129,13 +130,18 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT AMDGPU_VEGA20_DOORBELL_IH = 0x178, /* MMSCH: 392~407 * overlap the doorbell assignment with VCN as they are mutually exclusive - * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD + * VCN engine's doorbell is 32 bit and two VCN ring share one QWORD */ - AMDGPU_VEGA20_DOORBELL64_VCN0_1 = 0x188, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */ + AMDGPU_VEGA20_DOORBELL64_VCN0_1 = 0x188, /* VNC0 */ AMDGPU_VEGA20_DOORBELL64_VCN2_3 = 0x189, AMDGPU_VEGA20_DOORBELL64_VCN4_5 = 0x18A, AMDGPU_VEGA20_DOORBELL64_VCN6_7 = 0x18B, + AMDGPU_VEGA20_DOORBELL64_VCN8_9 = 0x18C, /* VNC1 */ + AMDGPU_VEGA20_DOORBELL64_VCNa_b = 0x18D, + AMDGPU_VEGA20_DOORBELL64_VCNc_d = 0x18E, + AMDGPU_VEGA20_DOORBELL64_VCNe_f = 0x18F, + AMDGPU_VEGA20_DOORBELL64_UVD_RING0_1 = 0x188, AMDGPU_VEGA20_DOORBELL64_UVD_RING2_3 = 0x189, AMDGPU_VEGA20_DOORBELL64_UVD_RING4_5 = 0x18A, @@ -153,6 +159,45 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT AMDGPU_VEGA20_DOORBELL_INVALID = 0xFFFF } AMDGPU_VEGA20_DOORBELL_ASSIGNMENT; +typedef enum _AMDGPU_NAVI10_DOORBELL_ASSIGNMENT +{ + /* Compute + GFX: 0~255 */ + AMDGPU_NAVI10_DOORBELL_KIQ = 0x000, + AMDGPU_NAVI10_DOORBELL_HIQ = 0x001, + AMDGPU_NAVI10_DOORBELL_DIQ = 0x002, + AMDGPU_NAVI10_DOORBELL_MEC_RING0 = 0x003, + AMDGPU_NAVI10_DOORBELL_MEC_RING1 = 0x004, + AMDGPU_NAVI10_DOORBELL_MEC_RING2 = 0x005, + AMDGPU_NAVI10_DOORBELL_MEC_RING3 = 0x006, + AMDGPU_NAVI10_DOORBELL_MEC_RING4 = 0x007, + AMDGPU_NAVI10_DOORBELL_MEC_RING5 = 0x008, + AMDGPU_NAVI10_DOORBELL_MEC_RING6 = 0x009, + AMDGPU_NAVI10_DOORBELL_MEC_RING7 = 0x00A, + AMDGPU_NAVI10_DOORBELL_USERQUEUE_START = 0x00B, + AMDGPU_NAVI10_DOORBELL_USERQUEUE_END = 0x08A, + AMDGPU_NAVI10_DOORBELL_GFX_RING0 = 0x08B, + AMDGPU_NAVI10_DOORBELL_GFX_RING1 = 0x08C, + /* SDMA:256~335*/ + AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0 = 0x100, + AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1 = 0x10A, + /* IH: 376~391 */ + AMDGPU_NAVI10_DOORBELL_IH = 0x178, + /* MMSCH: 392~407 + * overlap the doorbell assignment with VCN as they are mutually exclusive + * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD + */ + AMDGPU_NAVI10_DOORBELL64_VCN0_1 = 0x188, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */ + AMDGPU_NAVI10_DOORBELL64_VCN2_3 = 0x189, + AMDGPU_NAVI10_DOORBELL64_VCN4_5 = 0x18A, + AMDGPU_NAVI10_DOORBELL64_VCN6_7 = 0x18B, + + AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0, + AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP = AMDGPU_NAVI10_DOORBELL64_VCN6_7, + + AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT = 0x18F, + AMDGPU_NAVI10_DOORBELL_INVALID = 0xFFFF +} AMDGPU_NAVI10_DOORBELL_ASSIGNMENT; + /* * 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index 344967df3137..9cc270efee7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -22,7 +22,6 @@ * Authors: Alex Deucher */ -#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_atombios.h" #include "amdgpu_i2c.h" @@ -904,3 +903,68 @@ amdgpu_get_vce_clock_state(void *handle, u32 idx) return NULL; } + +int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low) +{ + uint32_t clk_freq; + int ret = 0; + if (is_support_sw_smu(adev)) { + ret = smu_get_dpm_freq_range(&adev->smu, SMU_GFXCLK, + low ? &clk_freq : NULL, + !low ? &clk_freq : NULL, + true); + if (ret) + return 0; + return clk_freq * 100; + + } else { + return (adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (low)); + } +} + +int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low) +{ + uint32_t clk_freq; + int ret = 0; + if (is_support_sw_smu(adev)) { + ret = smu_get_dpm_freq_range(&adev->smu, SMU_UCLK, + low ? &clk_freq : NULL, + !low ? &clk_freq : NULL, + true); + if (ret) + return 0; + return clk_freq * 100; + + } else { + return (adev)->powerplay.pp_funcs->get_mclk((adev)->powerplay.pp_handle, (low)); + } +} + +int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block_type, bool gate) +{ + int ret = 0; + bool swsmu = is_support_sw_smu(adev); + + switch (block_type) { + case AMD_IP_BLOCK_TYPE_GFX: + case AMD_IP_BLOCK_TYPE_UVD: + case AMD_IP_BLOCK_TYPE_VCN: + case AMD_IP_BLOCK_TYPE_VCE: + case AMD_IP_BLOCK_TYPE_SDMA: + if (swsmu) + ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate); + else + ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( + (adev)->powerplay.pp_handle, block_type, gate)); + break; + case AMD_IP_BLOCK_TYPE_GMC: + case AMD_IP_BLOCK_TYPE_ACP: + ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( + (adev)->powerplay.pp_handle, block_type, gate)); + break; + default: + break; + } + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index e871e022c129..2cfb677272af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -75,6 +75,20 @@ struct amdgpu_dpm_thermal { int min_temp; /* high temperature threshold */ int max_temp; + /* edge max emergency(shutdown) temp */ + int max_edge_emergency_temp; + /* hotspot low temperature threshold */ + int min_hotspot_temp; + /* hotspot high temperature critical threshold */ + int max_hotspot_crit_temp; + /* hotspot max emergency(shutdown) temp */ + int max_hotspot_emergency_temp; + /* memory low temperature threshold */ + int min_mem_temp; + /* memory high temperature critical threshold */ + int max_mem_crit_temp; + /* memory max emergency(shutdown) temp */ + int max_mem_emergency_temp; /* was last interrupt low to high or high to low */ bool high_to_low; /* interrupt source */ @@ -260,9 +274,6 @@ enum amdgpu_pcie_gen { #define amdgpu_dpm_enable_bapm(adev, e) \ ((adev)->powerplay.pp_funcs->enable_bapm((adev)->powerplay.pp_handle, (e))) -#define amdgpu_dpm_read_sensor(adev, idx, value, size) \ - ((adev)->powerplay.pp_funcs->read_sensor((adev)->powerplay.pp_handle, (idx), (value), (size))) - #define amdgpu_dpm_set_fan_control_mode(adev, m) \ ((adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m))) @@ -281,12 +292,6 @@ enum amdgpu_pcie_gen { #define amdgpu_dpm_set_fan_speed_rpm(adev, s) \ ((adev)->powerplay.pp_funcs->set_fan_speed_rpm)((adev)->powerplay.pp_handle, (s)) -#define amdgpu_dpm_get_sclk(adev, l) \ - ((adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (l))) - -#define amdgpu_dpm_get_mclk(adev, l) \ - ((adev)->powerplay.pp_funcs->get_mclk((adev)->powerplay.pp_handle, (l))) - #define amdgpu_dpm_force_performance_level(adev, l) \ ((adev)->powerplay.pp_funcs->force_performance_level((adev)->powerplay.pp_handle, (l))) @@ -344,10 +349,6 @@ enum amdgpu_pcie_gen { ((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\ (adev)->powerplay.pp_handle, msg_id)) -#define amdgpu_dpm_set_powergating_by_smu(adev, block_type, gate) \ - ((adev)->powerplay.pp_funcs->set_powergating_by_smu(\ - (adev)->powerplay.pp_handle, block_type, gate)) - #define amdgpu_dpm_get_power_profile_mode(adev, buf) \ ((adev)->powerplay.pp_funcs->get_power_profile_mode(\ (adev)->powerplay.pp_handle, buf)) @@ -448,6 +449,9 @@ struct amdgpu_pm { uint32_t smu_prv_buffer_size; struct amdgpu_bo *smu_prv_buffer; bool ac_power; + /* powerplay feature */ + uint32_t pp_feature; + }; #define R600_SSTU_DFLT 0 @@ -486,6 +490,8 @@ void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev, u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev); u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev); void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev); +int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors sensor, + void *data, uint32_t *size); bool amdgpu_is_internal_thermal_sensor(enum amdgpu_int_thermal_type sensor); @@ -504,4 +510,11 @@ enum amdgpu_pcie_gen amdgpu_get_pcie_gen_support(struct amdgpu_device *adev, struct amd_vce_state* amdgpu_get_vce_clock_state(void *handle, u32 idx); +int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, + uint32_t block_type, bool gate); + +extern int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low); + +extern int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 7419ea8a388b..0ffc9447b573 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -22,24 +22,29 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include <drm/drmP.h> #include <drm/amdgpu_drm.h> +#include <drm/drm_drv.h> #include <drm/drm_gem.h> +#include <drm/drm_vblank.h> #include "amdgpu_drv.h" #include <drm/drm_pciids.h> #include <linux/console.h> #include <linux/module.h> +#include <linux/pci.h> #include <linux/pm_runtime.h> #include <linux/vga_switcheroo.h> #include <drm/drm_probe_helper.h> +#include <linux/mmu_notifier.h> #include "amdgpu.h" #include "amdgpu_irq.h" -#include "amdgpu_gem.h" +#include "amdgpu_dma_buf.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_ras.h" + /* * KMS wrapper. * - 3.0.0 - initial driver @@ -74,9 +79,15 @@ * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID * - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE. + * - 3.31.0 - Add support for per-flip tiling attribute changes with DC + * - 3.32.0 - Add syncobj timeline support to AMDGPU_CS. + * - 3.33.0 - Fixes for GDS ENOMEM failures in AMDGPU_CS. + * - 3.34.0 - Non-DC can flip correctly between buffers with different pitches + * - 3.35.0 - Add drm_amdgpu_info_device::tcc_disabled_mask + * - 3.36.0 - Allow reading more status registers on si/cik */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 30 +#define KMS_DRIVER_MINOR 36 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; @@ -91,7 +102,7 @@ int amdgpu_disp_priority = 0; int amdgpu_hw_i2c = 0; int amdgpu_pcie_gen2 = -1; int amdgpu_msi = -1; -int amdgpu_lockup_timeout = 10000; +char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH]; int amdgpu_dpm = -1; int amdgpu_fw_load_type = -1; int amdgpu_aspm = -1; @@ -104,7 +115,6 @@ int amdgpu_vm_fragment_size = -1; int amdgpu_vm_block_size = -1; int amdgpu_vm_fault_stop = 0; int amdgpu_vm_debug = 0; -int amdgpu_vram_page_split = 512; int amdgpu_vm_update_mode = -1; int amdgpu_exp_hw_support = 0; int amdgpu_dc = -1; @@ -117,13 +127,9 @@ uint amdgpu_pg_mask = 0xffffffff; uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu = NULL; char *amdgpu_virtual_display = NULL; -/* OverDrive(bit 14),gfxoff(bit 15),stutter mode(bit 17) disabled by default*/ -uint amdgpu_pp_feature_mask = 0xfffd3fff; -int amdgpu_ngg = 0; -int amdgpu_prim_buf_per_se = 0; -int amdgpu_pos_buf_per_se = 0; -int amdgpu_cntl_sb_buf_per_se = 0; -int amdgpu_param_buf_per_se = 0; +/* OverDrive(bit 14) disabled by default*/ +uint amdgpu_pp_feature_mask = 0xffffbfff; +uint amdgpu_force_long_training = 0; int amdgpu_job_hang_limit = 0; int amdgpu_lbpw = -1; int amdgpu_compute_multipipe = -1; @@ -132,10 +138,18 @@ int amdgpu_emu_mode = 0; uint amdgpu_smu_memory_pool_size = 0; /* FBC (bit 0) disabled by default*/ uint amdgpu_dc_feature_mask = 0; +int amdgpu_async_gfx_ring = 1; +int amdgpu_mcbp = 0; +int amdgpu_discovery = -1; +int amdgpu_mes = 0; +int amdgpu_noretry = 1; +int amdgpu_force_asic_type = -1; struct amdgpu_mgpu_info mgpu_info = { .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), }; +int amdgpu_ras_enable = -1; +uint amdgpu_ras_mask = 0xffffffff; /** * DOC: vramlimit (int) @@ -223,16 +237,33 @@ MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)"); module_param_named(msi, amdgpu_msi, int, 0444); /** - * DOC: lockup_timeout (int) - * Set GPU scheduler timeout value in ms. Value 0 is invalidated, will be adjusted to 10000. - * Negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET). The default is 10000. + * DOC: lockup_timeout (string) + * Set GPU scheduler timeout value in ms. + * + * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or + * multiple values specified. 0 and negative values are invalidated. They will be adjusted + * to the default timeout. + * + * - With one value specified, the setting will apply to all non-compute jobs. + * - With multiple values specified, the first one will be for GFX. + * The second one is for Compute. The third and fourth ones are + * for SDMA and Video. + * + * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video) + * jobs is 10000. And there is no timeout enforced on compute jobs. */ -MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms > 0 (default 10000)"); -module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444); +MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and infinity timeout for compute jobs; " + "for passthrough or sriov, 10000 for all jobs." + " 0: keep default value. negative: infinity timeout), " + "format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; " + "for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video]."); +module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444); /** * DOC: dpm (int) - * Override for dynamic power management setting (1 = enable, 0 = disable). The default is -1 (auto). + * Override for dynamic power management setting + * (0 = disable, 1 = enable, 2 = enable sw smu driver for vega20) + * The default is -1 (auto). */ MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = auto)"); module_param_named(dpm, amdgpu_dpm, int, 0444); @@ -328,13 +359,6 @@ MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never (default except module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444); /** - * DOC: vram_page_split (int) - * Override the number of pages after we split VRAM allocations (default 512, -1 = disable). The default is 512. - */ -MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 512, -1 = disable)"); -module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444); - -/** * DOC: exp_hw_support (int) * Enable experimental hw support (1 = enable). The default is 0 (disabled). */ @@ -371,6 +395,14 @@ MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))"); module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, uint, 0444); /** + * DOC: forcelongtraining (uint) + * Force long memory training in resume. + * The default is zero, indicates short training in resume. + */ +MODULE_PARM_DESC(forcelongtraining, "force memory long training"); +module_param_named(forcelongtraining, amdgpu_force_long_training, uint, 0444); + +/** * DOC: pcie_gen_cap (uint) * Override PCIE gen speed capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h. * The default is 0 (automatic for each asic). @@ -428,42 +460,6 @@ MODULE_PARM_DESC(virtual_display, module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444); /** - * DOC: ngg (int) - * Set to enable Next Generation Graphics (1 = enable). The default is 0 (disabled). - */ -MODULE_PARM_DESC(ngg, "Next Generation Graphics (1 = enable, 0 = disable(default depending on gfx))"); -module_param_named(ngg, amdgpu_ngg, int, 0444); - -/** - * DOC: prim_buf_per_se (int) - * Override the size of Primitive Buffer per Shader Engine in Byte. The default is 0 (depending on gfx). - */ -MODULE_PARM_DESC(prim_buf_per_se, "the size of Primitive Buffer per Shader Engine (default depending on gfx)"); -module_param_named(prim_buf_per_se, amdgpu_prim_buf_per_se, int, 0444); - -/** - * DOC: pos_buf_per_se (int) - * Override the size of Position Buffer per Shader Engine in Byte. The default is 0 (depending on gfx). - */ -MODULE_PARM_DESC(pos_buf_per_se, "the size of Position Buffer per Shader Engine (default depending on gfx)"); -module_param_named(pos_buf_per_se, amdgpu_pos_buf_per_se, int, 0444); - -/** - * DOC: cntl_sb_buf_per_se (int) - * Override the size of Control Sideband per Shader Engine in Byte. The default is 0 (depending on gfx). - */ -MODULE_PARM_DESC(cntl_sb_buf_per_se, "the size of Control Sideband per Shader Engine (default depending on gfx)"); -module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444); - -/** - * DOC: param_buf_per_se (int) - * Override the size of Off-Chip Parameter Cache per Shader Engine in Byte. - * The default is 0 (depending on gfx). - */ -MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Parameter Cache per Shader Engine (default depending on gfx)"); -module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444); - -/** * DOC: job_hang_limit (int) * Set how much time allow a job hang and not drop it. The default is 0. */ @@ -495,6 +491,21 @@ MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)"); module_param_named(emu_mode, amdgpu_emu_mode, int, 0444); /** + * DOC: ras_enable (int) + * Enable RAS features on the GPU (0 = disable, 1 = enable, -1 = auto (default)) + */ +MODULE_PARM_DESC(ras_enable, "Enable RAS features on the GPU (0 = disable, 1 = enable, -1 = auto (default))"); +module_param_named(ras_enable, amdgpu_ras_enable, int, 0444); + +/** + * DOC: ras_mask (uint) + * Mask of RAS features to enable (default 0xffffffff), only valid when ras_enable == 1 + * See the flags in drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h + */ +MODULE_PARM_DESC(ras_mask, "Mask of RAS features to enable (default 0xffffffff), only valid when ras_enable == 1"); +module_param_named(ras_mask, amdgpu_ras_mask, uint, 0444); + +/** * DOC: si_support (int) * Set SI support driver. This parameter works after set config CONFIG_DRM_AMDGPU_SI. For SI asic, when radeon driver is enabled, * set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available, @@ -542,6 +553,54 @@ MODULE_PARM_DESC(smu_memory_pool_size, "0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte"); module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444); +/** + * DOC: async_gfx_ring (int) + * It is used to enable gfx rings that could be configured with different prioritites or equal priorities + */ +MODULE_PARM_DESC(async_gfx_ring, + "Asynchronous GFX rings that could be configured with either different priorities (HP3D ring and LP3D ring), or equal priorities (0 = disabled, 1 = enabled (default))"); +module_param_named(async_gfx_ring, amdgpu_async_gfx_ring, int, 0444); + +/** + * DOC: mcbp (int) + * It is used to enable mid command buffer preemption. (0 = disabled (default), 1 = enabled) + */ +MODULE_PARM_DESC(mcbp, + "Enable Mid-command buffer preemption (0 = disabled (default), 1 = enabled)"); +module_param_named(mcbp, amdgpu_mcbp, int, 0444); + +/** + * DOC: discovery (int) + * Allow driver to discover hardware IP information from IP Discovery table at the top of VRAM. + * (-1 = auto (default), 0 = disabled, 1 = enabled) + */ +MODULE_PARM_DESC(discovery, + "Allow driver to discover hardware IPs from IP Discovery table at the top of VRAM"); +module_param_named(discovery, amdgpu_discovery, int, 0444); + +/** + * DOC: mes (int) + * Enable Micro Engine Scheduler. This is a new hw scheduling engine for gfx, sdma, and compute. + * (0 = disabled (default), 1 = enabled) + */ +MODULE_PARM_DESC(mes, + "Enable Micro Engine Scheduler (0 = disabled (default), 1 = enabled)"); +module_param_named(mes, amdgpu_mes, int, 0444); + +MODULE_PARM_DESC(noretry, + "Disable retry faults (0 = retry enabled, 1 = retry disabled (default))"); +module_param_named(noretry, amdgpu_noretry, int, 0644); + +/** + * DOC: force_asic_type (int) + * A non negative value used to specify the asic type for all supported GPUs. + */ +MODULE_PARM_DESC(force_asic_type, + "A non negative value used to specify the asic type for all supported GPUs"); +module_param_named(force_asic_type, amdgpu_force_asic_type, int, 0444); + + + #ifdef CONFIG_HSA_AMD /** * DOC: sched_policy (int) @@ -618,17 +677,6 @@ MODULE_PARM_DESC(ignore_crat, "Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)"); /** - * DOC: noretry (int) - * This parameter sets sh_mem_config.retry_disable. Default value, 0, enables retry. - * Setting 1 disables retry. - * Retry is needed for recoverable page faults. - */ -int noretry; -module_param(noretry, int, 0644); -MODULE_PARM_DESC(noretry, - "Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)"); - -/** * DOC: halt_if_hws_hang (int) * Halt if HWS hang is detected. Default value, 0, disables the halt on hang. * Setting 1 enables halt on hang. @@ -636,6 +684,24 @@ MODULE_PARM_DESC(noretry, int halt_if_hws_hang; module_param(halt_if_hws_hang, int, 0644); MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)"); + +/** + * DOC: hws_gws_support(bool) + * Whether HWS support gws barriers. Default value: false (not supported) + * This will be replaced with a MEC firmware version check once firmware + * is ready + */ +bool hws_gws_support; +module_param(hws_gws_support, bool, 0444); +MODULE_PARM_DESC(hws_gws_support, "MEC FW support gws barriers (false = not supported (Default), true = supported)"); + +/** + * DOC: queue_preemption_timeout_ms (int) + * queue preemption timeout in ms (1 = Minimum, 9000 = default) + */ +int queue_preemption_timeout_ms = 9000; +module_param(queue_preemption_timeout_ms, int, 0644); +MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption timeout in ms (1 = Minimum, 9000 = default)"); #endif /** @@ -646,6 +712,22 @@ MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (defau MODULE_PARM_DESC(dcfeaturemask, "all stable DC features enabled (default))"); module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444); +/** + * DOC: abmlevel (uint) + * Override the default ABM (Adaptive Backlight Management) level used for DC + * enabled hardware. Requires DMCU to be supported and loaded. + * Valid levels are 0-4. A value of 0 indicates that ABM should be disabled by + * default. Values 1-4 control the maximum allowable brightness reduction via + * the ABM algorithm, with 1 being the least reduction and 4 being the most + * reduction. + * + * Defaults to 0, or disabled. Userspace can still override this level later + * after boot. + */ +uint amdgpu_dm_abm_level = 0; +MODULE_PARM_DESC(abmlevel, "ABM level (0 = off (default), 1-4 = backlight reduction level) "); +module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444); + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, @@ -902,6 +984,31 @@ static const struct pci_device_id pciidlist[] = { /* Raven */ {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, {0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, + /* Arcturus */ + {0x1002, 0x738C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x7388, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x738E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x7390, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, + /* Navi10 */ + {0x1002, 0x7310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, + {0x1002, 0x7312, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, + {0x1002, 0x7318, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, + {0x1002, 0x7319, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, + {0x1002, 0x731A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, + {0x1002, 0x731B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, + {0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, + /* Navi14 */ + {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + {0x1002, 0x734F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + + /* Renoir */ + {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU|AMD_EXP_HW_SUPPORT}, + + /* Navi12 */ + {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x7362, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT}, {0, 0, 0} }; @@ -928,8 +1035,43 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, return -ENODEV; } +#ifdef CONFIG_DRM_AMDGPU_SI + if (!amdgpu_si_support) { + switch (flags & AMD_ASIC_MASK) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + case CHIP_HAINAN: + dev_info(&pdev->dev, + "SI support provided by radeon.\n"); + dev_info(&pdev->dev, + "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n" + ); + return -ENODEV; + } + } +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK + if (!amdgpu_cik_support) { + switch (flags & AMD_ASIC_MASK) { + case CHIP_KAVERI: + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KABINI: + case CHIP_MULLINS: + dev_info(&pdev->dev, + "CIK support provided by radeon.\n"); + dev_info(&pdev->dev, + "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n" + ); + return -ENODEV; + } + } +#endif + /* Get rid of things like offb */ - ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, 0, "amdgpudrmfb"); + ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, "amdgpudrmfb"); if (ret) return ret; @@ -972,8 +1114,12 @@ amdgpu_pci_remove(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); - DRM_ERROR("Device removal is currently not supported outside of fbcon\n"); +#ifdef MODULE + if (THIS_MODULE->state != MODULE_STATE_GOING) +#endif + DRM_ERROR("Hotplug removal is not supported\n"); drm_dev_unplug(dev); + drm_dev_put(dev); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); } @@ -984,26 +1130,29 @@ amdgpu_pci_shutdown(struct pci_dev *pdev) struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = dev->dev_private; + if (amdgpu_ras_intr_triggered()) + return; + /* if we are running in a VM, make sure the device * torn down properly on reboot/shutdown. * unfortunately we can't detect certain * hypervisors so just do this all the time. */ + adev->mp1_state = PP_MP1_STATE_UNLOAD; amdgpu_device_ip_suspend(adev); + adev->mp1_state = PP_MP1_STATE_NONE; } static int amdgpu_pmops_suspend(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = dev_get_drvdata(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); return amdgpu_device_suspend(drm_dev, true, true); } static int amdgpu_pmops_resume(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); + struct drm_device *drm_dev = dev_get_drvdata(dev); /* GPU comes up enabled by the bios on resume */ if (amdgpu_device_is_px(drm_dev)) { @@ -1017,33 +1166,34 @@ static int amdgpu_pmops_resume(struct device *dev) static int amdgpu_pmops_freeze(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_dev->dev_private; + int r; - struct drm_device *drm_dev = pci_get_drvdata(pdev); - return amdgpu_device_suspend(drm_dev, false, true); + r = amdgpu_device_suspend(drm_dev, false, true); + if (r) + return r; + return amdgpu_asic_reset(adev); } static int amdgpu_pmops_thaw(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = dev_get_drvdata(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); return amdgpu_device_resume(drm_dev, false, true); } static int amdgpu_pmops_poweroff(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = dev_get_drvdata(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); return amdgpu_device_suspend(drm_dev, true, true); } static int amdgpu_pmops_restore(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = dev_get_drvdata(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); return amdgpu_device_resume(drm_dev, false, true); } @@ -1102,8 +1252,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) static int amdgpu_pmops_runtime_idle(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); + struct drm_device *drm_dev = dev_get_drvdata(dev); struct drm_crtc *crtc; if (!amdgpu_device_is_px(drm_dev)) { @@ -1158,13 +1307,14 @@ static int amdgpu_flush(struct file *f, fl_owner_t id) { struct drm_file *file_priv = f->private_data; struct amdgpu_fpriv *fpriv = file_priv->driver_priv; + long timeout = MAX_WAIT_SCHED_ENTITY_Q_EMPTY; - amdgpu_ctx_mgr_entity_flush(&fpriv->ctx_mgr); + timeout = amdgpu_ctx_mgr_entity_flush(&fpriv->ctx_mgr, timeout); + timeout = amdgpu_vm_wait_idle(&fpriv->vm, timeout); - return 0; + return timeout >= 0 ? 0 : timeout; } - static const struct file_operations amdgpu_driver_kms_fops = { .owner = THIS_MODULE, .open = drm_open, @@ -1209,7 +1359,7 @@ static struct drm_driver kms_driver = { .driver_features = DRIVER_USE_AGP | DRIVER_ATOMIC | DRIVER_GEM | - DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ, + DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ, .load = amdgpu_driver_load_kms, .open = amdgpu_driver_open_kms, .postclose = amdgpu_driver_postclose_kms, @@ -1233,9 +1383,6 @@ static struct drm_driver kms_driver = { .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_export = amdgpu_gem_prime_export, .gem_prime_import = amdgpu_gem_prime_import, - .gem_prime_res_obj = amdgpu_gem_prime_res_obj, - .gem_prime_get_sg_table = amdgpu_gem_prime_get_sg_table, - .gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table, .gem_prime_vmap = amdgpu_gem_prime_vmap, .gem_prime_vunmap = amdgpu_gem_prime_vunmap, .gem_prime_mmap = amdgpu_gem_prime_mmap, @@ -1300,6 +1447,7 @@ static void __exit amdgpu_exit(void) amdgpu_unregister_atpx_handler(); amdgpu_sync_fini(); amdgpu_fence_slab_fini(); + mmu_notifier_synchronize(); } module_init(amdgpu_init); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c index ec78e2b2015c..61fcf247a638 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c @@ -23,7 +23,7 @@ * Authors: Dave Airlie * Alex Deucher */ -#include <drm/drmP.h> + #include <drm/drm_crtc_helper.h> #include <drm/amdgpu_drm.h> #include "amdgpu.h" @@ -37,12 +37,14 @@ amdgpu_link_encoder_connector(struct drm_device *dev) { struct amdgpu_device *adev = dev->dev_private; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector; struct drm_encoder *encoder; struct amdgpu_encoder *amdgpu_encoder; + drm_connector_list_iter_begin(dev, &iter); /* walk the list and link encoders to connectors */ - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_for_each_connector_iter(connector, &iter) { amdgpu_connector = to_amdgpu_connector(connector); list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { amdgpu_encoder = to_amdgpu_encoder(encoder); @@ -55,6 +57,7 @@ amdgpu_link_encoder_connector(struct drm_device *dev) } } } + drm_connector_list_iter_end(&iter); } void amdgpu_encoder_set_active_device(struct drm_encoder *encoder) @@ -62,8 +65,10 @@ void amdgpu_encoder_set_active_device(struct drm_encoder *encoder) struct drm_device *dev = encoder->dev; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct drm_connector *connector; + struct drm_connector_list_iter iter; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); amdgpu_encoder->active_device = amdgpu_encoder->devices & amdgpu_connector->devices; @@ -72,6 +77,7 @@ void amdgpu_encoder_set_active_device(struct drm_encoder *encoder) amdgpu_connector->devices, encoder->encoder_type); } } + drm_connector_list_iter_end(&iter); } struct drm_connector * @@ -79,15 +85,20 @@ amdgpu_get_connector_for_encoder(struct drm_encoder *encoder) { struct drm_device *dev = encoder->dev; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct drm_connector *connector; + struct drm_connector *connector, *found = NULL; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { amdgpu_connector = to_amdgpu_connector(connector); - if (amdgpu_encoder->active_device & amdgpu_connector->devices) - return connector; + if (amdgpu_encoder->active_device & amdgpu_connector->devices) { + found = connector; + break; + } } - return NULL; + drm_connector_list_iter_end(&iter); + return found; } struct drm_connector * @@ -95,15 +106,20 @@ amdgpu_get_connector_for_encoder_init(struct drm_encoder *encoder) { struct drm_device *dev = encoder->dev; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct drm_connector *connector; + struct drm_connector *connector, *found = NULL; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { amdgpu_connector = to_amdgpu_connector(connector); - if (amdgpu_encoder->devices & amdgpu_connector->devices) - return connector; + if (amdgpu_encoder->devices & amdgpu_connector->devices) { + found = connector; + break; + } } - return NULL; + drm_connector_list_iter_end(&iter); + return found; } struct drm_encoder *amdgpu_get_external_encoder(struct drm_encoder *encoder) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 5cbde74b97dd..143753d237e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -23,22 +23,22 @@ * Authors: * David Airlie */ + #include <linux/module.h> -#include <linux/slab.h> #include <linux/pm_runtime.h> +#include <linux/slab.h> +#include <linux/vga_switcheroo.h> -#include <drm/drmP.h> +#include <drm/amdgpu_drm.h> #include <drm/drm_crtc.h> #include <drm/drm_crtc_helper.h> -#include <drm/amdgpu_drm.h> +#include <drm/drm_fb_helper.h> +#include <drm/drm_fourcc.h> + #include "amdgpu.h" #include "cikd.h" #include "amdgpu_gem.h" -#include <drm/drm_fb_helper.h> - -#include <linux/vga_switcheroo.h> - #include "amdgpu_display.h" /* object hierarchy - @@ -49,12 +49,11 @@ static int amdgpufb_open(struct fb_info *info, int user) { - struct amdgpu_fbdev *rfbdev = info->par; - struct amdgpu_device *adev = rfbdev->adev; - int ret = pm_runtime_get_sync(adev->ddev->dev); + struct drm_fb_helper *fb_helper = info->par; + int ret = pm_runtime_get_sync(fb_helper->dev->dev); if (ret < 0 && ret != -EACCES) { - pm_runtime_mark_last_busy(adev->ddev->dev); - pm_runtime_put_autosuspend(adev->ddev->dev); + pm_runtime_mark_last_busy(fb_helper->dev->dev); + pm_runtime_put_autosuspend(fb_helper->dev->dev); return ret; } return 0; @@ -63,11 +62,10 @@ amdgpufb_open(struct fb_info *info, int user) static int amdgpufb_release(struct fb_info *info, int user) { - struct amdgpu_fbdev *rfbdev = info->par; - struct amdgpu_device *adev = rfbdev->adev; + struct drm_fb_helper *fb_helper = info->par; - pm_runtime_mark_last_busy(adev->ddev->dev); - pm_runtime_put_autosuspend(adev->ddev->dev); + pm_runtime_mark_last_busy(fb_helper->dev->dev); + pm_runtime_put_autosuspend(fb_helper->dev->dev); return 0; } @@ -123,6 +121,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **gobj_p) { + const struct drm_format_info *info; struct amdgpu_device *adev = rfbdev->adev; struct drm_gem_object *gobj = NULL; struct amdgpu_bo *abo = NULL; @@ -132,21 +131,22 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, int aligned_size, size; int height = mode_cmd->height; u32 cpp; + u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_VRAM_CLEARED | + AMDGPU_GEM_CREATE_CPU_GTT_USWC; - cpp = drm_format_plane_cpp(mode_cmd->pixel_format, 0); + info = drm_get_format_info(adev->ddev, mode_cmd); + cpp = info->cpp[0]; /* need to align pitch with crtc limits */ mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp, fb_tiled); - domain = amdgpu_display_supported_domains(adev); - + domain = amdgpu_display_supported_domains(adev, flags); height = ALIGN(mode_cmd->height, 8); size = mode_cmd->pitches[0] * height; aligned_size = ALIGN(size, PAGE_SIZE); - ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED, + ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, flags, ttm_bo_type_kernel, NULL, &gobj); if (ret) { pr_err("failed to allocate framebuffer (%d)\n", aligned_size); @@ -168,7 +168,6 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, dev_err(adev->dev, "FB failed to set tiling flags\n"); } - ret = amdgpu_bo_pin(abo, domain); if (ret) { amdgpu_bo_unreserve(abo); @@ -233,9 +232,6 @@ static int amdgpufb_create(struct drm_fb_helper *helper, goto out; } - info->par = rfbdev; - info->skip_vt_switch = true; - ret = amdgpu_display_framebuffer_init(adev->ddev, &rfbdev->rfb, &mode_cmd, gobj); if (ret) { @@ -248,10 +244,6 @@ static int amdgpufb_create(struct drm_fb_helper *helper, /* setup helper */ rfbdev->helper.fb = fb; - strcpy(info->fix.id, "amdgpudrmfb"); - - drm_fb_helper_fill_fix(info, fb->pitches[0], fb->format->depth); - info->fbops = &amdgpufb_ops; tmp = amdgpu_bo_gpu_offset(abo) - adev->gmc.vram_start; @@ -260,7 +252,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper, info->screen_base = amdgpu_bo_kptr(abo); info->screen_size = amdgpu_bo_size(abo); - drm_fb_helper_fill_var(info, &rfbdev->helper, sizes->fb_width, sizes->fb_height); + drm_fb_helper_fill_info(info, &rfbdev->helper, sizes); /* setup aperture base/size for vesafb takeover */ info->apertures->ranges[0].base = adev->ddev->mode_config.fb_base; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index ee47c11e92ce..377fe20bce23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -34,7 +34,9 @@ #include <linux/kref.h> #include <linux/slab.h> #include <linux/firmware.h> -#include <drm/drmP.h> + +#include <drm/drm_debugfs.h> + #include "amdgpu.h" #include "amdgpu_trace.h" @@ -136,8 +138,9 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, { struct amdgpu_device *adev = ring->adev; struct amdgpu_fence *fence; - struct dma_fence *old, **ptr; + struct dma_fence __rcu **ptr; uint32_t seq; + int r; fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL); if (fence == NULL) @@ -153,15 +156,24 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, seq, flags | AMDGPU_FENCE_FLAG_INT); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; + if (unlikely(rcu_dereference_protected(*ptr, 1))) { + struct dma_fence *old; + + rcu_read_lock(); + old = dma_fence_get_rcu_safe(ptr); + rcu_read_unlock(); + + if (old) { + r = dma_fence_wait(old, false); + dma_fence_put(old); + if (r) + return r; + } + } + /* This function can't be called concurrently anyway, otherwise * emitting the fence would mess up the hardware ring buffer. */ - old = rcu_dereference_protected(*ptr, 1); - if (old && !dma_fence_is_signaled(old)) { - DRM_INFO("rcu slot is busy\n"); - dma_fence_wait(old, false); - } - rcu_assign_pointer(*ptr, dma_fence_get(&fence->base)); *f = &fence->base; @@ -417,9 +429,13 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, unsigned num_hw_submission) { + struct amdgpu_device *adev = ring->adev; long timeout; int r; + if (!adev) + return -EINVAL; + /* Check that num_hw_submission is a power of two */ if ((num_hw_submission & (num_hw_submission - 1)) != 0) return -EINVAL; @@ -441,12 +457,20 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, /* No need to setup the GPU scheduler for KIQ ring */ if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) { - /* for non-sriov case, no timeout enforce on compute ring */ - if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) - && !amdgpu_sriov_vf(ring->adev)) - timeout = MAX_SCHEDULE_TIMEOUT; - else - timeout = msecs_to_jiffies(amdgpu_lockup_timeout); + switch (ring->funcs->type) { + case AMDGPU_RING_TYPE_GFX: + timeout = adev->gfx_timeout; + break; + case AMDGPU_RING_TYPE_COMPUTE: + timeout = adev->compute_timeout; + break; + case AMDGPU_RING_TYPE_SDMA: + timeout = adev->sdma_timeout; + break; + default: + timeout = adev->video_timeout; + break; + } r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, num_hw_submission, amdgpu_job_hang_limit, @@ -674,22 +698,30 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) amdgpu_fence_process(ring); seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name); - seq_printf(m, "Last signaled fence 0x%08x\n", + seq_printf(m, "Last signaled fence 0x%08x\n", atomic_read(&ring->fence_drv.last_seq)); - seq_printf(m, "Last emitted 0x%08x\n", + seq_printf(m, "Last emitted 0x%08x\n", ring->fence_drv.sync_seq); + if (ring->funcs->type == AMDGPU_RING_TYPE_GFX || + ring->funcs->type == AMDGPU_RING_TYPE_SDMA) { + seq_printf(m, "Last signaled trailing fence 0x%08x\n", + le32_to_cpu(*ring->trail_fence_cpu_addr)); + seq_printf(m, "Last emitted 0x%08x\n", + ring->trail_seq); + } + if (ring->funcs->type != AMDGPU_RING_TYPE_GFX) continue; /* set in CP_VMID_PREEMPT and preemption occurred */ - seq_printf(m, "Last preempted 0x%08x\n", + seq_printf(m, "Last preempted 0x%08x\n", le32_to_cpu(*(ring->fence_drv.cpu_addr + 2))); /* set in CP_VMID_RESET and reset occurred */ - seq_printf(m, "Last reset 0x%08x\n", + seq_printf(m, "Last reset 0x%08x\n", le32_to_cpu(*(ring->fence_drv.cpu_addr + 4))); /* Both preemption and reset occurred */ - seq_printf(m, "Last both 0x%08x\n", + seq_printf(m, "Last both 0x%08x\n", le32_to_cpu(*(ring->fence_drv.cpu_addr + 6))); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 6d11e1721147..19705e399905 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -25,7 +25,10 @@ * Alex Deucher * Jerome Glisse */ -#include <drm/drmP.h> + +#include <linux/pci.h> +#include <linux/vmalloc.h> + #include <drm/amdgpu_drm.h> #ifdef CONFIG_X86 #include <asm/set_memory.h> @@ -68,7 +71,7 @@ */ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev) { - struct page *dummy_page = adev->mman.bdev.glob->dummy_read_page; + struct page *dummy_page = ttm_bo_glob.dummy_read_page; if (adev->dummy_page_addr) return 0; @@ -248,7 +251,9 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, } mb(); amdgpu_asic_flush_hdp(adev, NULL); - amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); + return 0; } @@ -307,9 +312,9 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, uint64_t flags) { #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS - unsigned i,t,p; + unsigned t,p; #endif - int r; + int r, i; if (!adev->gart.ready) { WARN(1, "trying to bind memory to uninitialized GART !\n"); @@ -333,7 +338,8 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, mb(); amdgpu_asic_flush_hdp(adev, NULL); - amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h index f89f5734d985..f6ac1e9548f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h @@ -27,26 +27,11 @@ struct amdgpu_ring; struct amdgpu_bo; -struct amdgpu_gds_asic_info { - uint32_t total_size; - uint32_t gfx_partition_size; - uint32_t cs_partition_size; -}; - struct amdgpu_gds { - struct amdgpu_gds_asic_info mem; - struct amdgpu_gds_asic_info gws; - struct amdgpu_gds_asic_info oa; - uint32_t gds_compute_max_wave_id; - - /* At present, GDS, GWS and OA resources for gfx (graphics) - * is always pre-allocated and available for graphics operation. - * Such resource is shared between all gfx clients. - * TODO: move this operation to user space - * */ - struct amdgpu_bo* gds_gfx_bo; - struct amdgpu_bo* gws_gfx_bo; - struct amdgpu_bo* oa_gfx_bo; + uint32_t gds_size; + uint32_t gws_size; + uint32_t oa_size; + uint32_t gds_compute_max_wave_id; }; struct amdgpu_gds_reg_offset { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index d21dd2f369da..4277125a79ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -26,11 +26,16 @@ * Jerome Glisse */ #include <linux/ktime.h> +#include <linux/module.h> #include <linux/pagemap.h> -#include <drm/drmP.h> +#include <linux/pci.h> + #include <drm/amdgpu_drm.h> +#include <drm/drm_debugfs.h> + #include "amdgpu.h" #include "amdgpu_display.h" +#include "amdgpu_xgmi.h" void amdgpu_gem_object_free(struct drm_gem_object *gobj) { @@ -45,7 +50,7 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj) int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, int alignment, u32 initial_domain, u64 flags, enum ttm_bo_type type, - struct reservation_object *resv, + struct dma_resv *resv, struct drm_gem_object **obj) { struct amdgpu_bo *bo; @@ -80,7 +85,7 @@ retry: } return r; } - *obj = &bo->gem_base; + *obj = &bo->tbo.base; return 0; } @@ -129,7 +134,7 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, return -EPERM; if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID && - abo->tbo.resv != vm->root.base.bo->tbo.resv) + abo->tbo.base.resv != vm->root.base.bo->tbo.base.resv) return -EPERM; r = amdgpu_bo_reserve(abo, false); @@ -210,7 +215,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, union drm_amdgpu_gem_create *args = data; uint64_t flags = args->in.domain_flags; uint64_t size = args->in.bo_size; - struct reservation_object *resv = NULL; + struct dma_resv *resv = NULL; struct drm_gem_object *gobj; uint32_t handle; int r; @@ -247,7 +252,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, if (r) return r; - resv = vm->root.base.bo->tbo.resv; + resv = vm->root.base.bo->tbo.base.resv; } r = amdgpu_gem_object_create(adev, size, args->in.alignment, @@ -286,6 +291,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, uint32_t handle; int r; + args->addr = untagged_addr(args->addr); + if (offset_in_page(args->addr | args->size)) return -EINVAL; @@ -322,33 +329,30 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, } if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) { - r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, - bo->tbo.ttm->pages); + r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); if (r) goto release_object; r = amdgpu_bo_reserve(bo, true); if (r) - goto free_pages; + goto user_pages_done; amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); amdgpu_bo_unreserve(bo); if (r) - goto free_pages; + goto user_pages_done; } r = drm_gem_handle_create(filp, gobj, &handle); - /* drop reference from allocate - handle holds it now */ - drm_gem_object_put_unlocked(gobj); if (r) - return r; + goto user_pages_done; args->handle = handle; - return 0; -free_pages: - release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages); +user_pages_done: + if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); release_object: drm_gem_object_put_unlocked(gobj); @@ -431,7 +435,7 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, return -ENOENT; } robj = gem_to_amdgpu_bo(gobj); - ret = reservation_object_wait_timeout_rcu(robj->tbo.resv, true, true, + ret = dma_resv_wait_timeout_rcu(robj->tbo.base.resv, true, true, timeout); /* ret == 0 means not signaled, @@ -523,13 +527,41 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, goto error; } - r = amdgpu_vm_update_directories(adev, vm); + r = amdgpu_vm_update_pdes(adev, vm, false); error: if (r && r != -ERESTARTSYS) DRM_ERROR("Couldn't update BO_VA (%d)\n", r); } +/** + * amdgpu_gem_va_map_flags - map GEM UAPI flags into hardware flags + * + * @adev: amdgpu_device pointer + * @flags: GEM UAPI flags + * + * Returns the GEM UAPI flags mapped into hardware for the ASIC. + */ +uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags) +{ + uint64_t pte_flag = 0; + + if (flags & AMDGPU_VM_PAGE_EXECUTABLE) + pte_flag |= AMDGPU_PTE_EXECUTABLE; + if (flags & AMDGPU_VM_PAGE_READABLE) + pte_flag |= AMDGPU_PTE_READABLE; + if (flags & AMDGPU_VM_PAGE_WRITEABLE) + pte_flag |= AMDGPU_PTE_WRITEABLE; + if (flags & AMDGPU_VM_PAGE_PRT) + pte_flag |= AMDGPU_PTE_PRT; + + if (adev->gmc.gmc_funcs->map_mtype) + pte_flag |= amdgpu_gmc_map_mtype(adev, + flags & AMDGPU_VM_MTYPE_MASK); + + return pte_flag; +} + int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { @@ -627,12 +659,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, switch (args->operation) { case AMDGPU_VA_OP_MAP: - r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address, - args->map_size); - if (r) - goto error_backoff; - - va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags); + va_flags = amdgpu_gem_va_map_flags(adev, args->flags); r = amdgpu_vm_bo_map(adev, bo_va, args->va_address, args->offset_in_bo, args->map_size, va_flags); @@ -647,12 +674,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->map_size); break; case AMDGPU_VA_OP_REPLACE: - r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address, - args->map_size); - if (r) - goto error_backoff; - - va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags); + va_flags = amdgpu_gem_va_map_flags(adev, args->flags); r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address, args->offset_in_bo, args->map_size, va_flags); @@ -678,6 +700,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, struct amdgpu_device *adev = dev->dev_private; struct drm_amdgpu_gem_op *args = data; struct drm_gem_object *gobj; + struct amdgpu_vm_bo_base *base; struct amdgpu_bo *robj; int r; @@ -696,7 +719,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, struct drm_amdgpu_gem_create_in info; void __user *out = u64_to_user_ptr(args->value); - info.bo_size = robj->gem_base.size; + info.bo_size = robj->tbo.base.size; info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT; info.domains = robj->preferred_domains; info.domain_flags = robj->flags; @@ -716,6 +739,15 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, amdgpu_bo_unreserve(robj); break; } + for (base = robj->vm_bo; base; base = base->next) + if (amdgpu_xgmi_same_hive(amdgpu_ttm_adev(robj->tbo.bdev), + amdgpu_ttm_adev(base->vm->root.base.bo->tbo.bdev))) { + r = -EINVAL; + amdgpu_bo_unreserve(robj); + goto out; + } + + robj->preferred_domains = args->value & (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_CPU); @@ -745,17 +777,26 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, struct amdgpu_device *adev = dev->dev_private; struct drm_gem_object *gobj; uint32_t handle; + u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_CPU_GTT_USWC; u32 domain; int r; + /* + * The buffer returned from this function should be cleared, but + * it can only be done if the ring is enabled or we'll fail to + * create the buffer. + */ + if (adev->mman.buffer_funcs_enabled) + flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED; + args->pitch = amdgpu_align_pitch(adev, args->width, DIV_ROUND_UP(args->bpp, 8), 0); args->size = (u64)args->pitch * args->height; args->size = ALIGN(args->size, PAGE_SIZE); domain = amdgpu_bo_get_preferred_pin_domain(adev, - amdgpu_display_supported_domains(adev)); - r = amdgpu_gem_object_create(adev, args->size, 0, domain, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, + amdgpu_display_supported_domains(adev, flags)); + r = amdgpu_gem_object_create(adev, args->size, 0, domain, flags, ttm_bo_type_device, NULL, &gobj); if (r) return -ENOMEM; @@ -809,8 +850,8 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data) if (pin_count) seq_printf(m, " pin count %d", pin_count); - dma_buf = READ_ONCE(bo->gem_base.dma_buf); - attachment = READ_ONCE(bo->gem_base.import_attach); + dma_buf = READ_ONCE(bo->tbo.base.dma_buf); + attachment = READ_ONCE(bo->tbo.base.import_attach); if (attachment) seq_printf(m, " imported from %p", dma_buf); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h index f1ddfc50bcc7..e0f025dd1b14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h @@ -31,7 +31,7 @@ */ #define AMDGPU_GEM_DOMAIN_MAX 0x3 -#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, gem_base) +#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, tbo.base) void amdgpu_gem_object_free(struct drm_gem_object *obj); int amdgpu_gem_object_open(struct drm_gem_object *obj, @@ -39,22 +39,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, void amdgpu_gem_object_close(struct drm_gem_object *obj, struct drm_file *file_priv); unsigned long amdgpu_gem_timeout(uint64_t timeout_ns); -struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj); -struct drm_gem_object * -amdgpu_gem_prime_import_sg_table(struct drm_device *dev, - struct dma_buf_attachment *attach, - struct sg_table *sg); -struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, - struct drm_gem_object *gobj, - int flags); -struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, - struct dma_buf *dma_buf); -struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); -void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); -void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); -int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); - -extern const struct dma_buf_ops amdgpu_dmabuf_ops; /* * GEM objects. @@ -63,7 +47,7 @@ void amdgpu_gem_force_release(struct amdgpu_device *adev); int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, int alignment, u32 initial_domain, u64 flags, enum ttm_bo_type type, - struct reservation_object *resv, + struct dma_resv *resv, struct drm_gem_object **obj); int amdgpu_mode_dumb_create(struct drm_file *file_priv, @@ -83,6 +67,7 @@ int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags); int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 97a60da62004..e00b46180d2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -22,10 +22,11 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include <drm/drmP.h> + #include "amdgpu.h" #include "amdgpu_gfx.h" #include "amdgpu_rlc.h" +#include "amdgpu_ras.h" /* delay 0.1 second to enable gfx off feature */ #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) @@ -34,8 +35,8 @@ * GPU GFX IP block helpers function. */ -int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, int mec, - int pipe, int queue) +int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec, + int pipe, int queue) { int bit = 0; @@ -47,8 +48,8 @@ int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, int mec, return bit; } -void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit, - int *mec, int *pipe, int *queue) +void amdgpu_gfx_bit_to_mec_queue(struct amdgpu_device *adev, int bit, + int *mec, int *pipe, int *queue) { *queue = bit % adev->gfx.mec.num_queue_per_pipe; *pipe = (bit / adev->gfx.mec.num_queue_per_pipe) @@ -61,10 +62,40 @@ void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit, bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec, int pipe, int queue) { - return test_bit(amdgpu_gfx_queue_to_bit(adev, mec, pipe, queue), + return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue), adev->gfx.mec.queue_bitmap); } +int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, + int me, int pipe, int queue) +{ + int bit = 0; + + bit += me * adev->gfx.me.num_pipe_per_me + * adev->gfx.me.num_queue_per_pipe; + bit += pipe * adev->gfx.me.num_queue_per_pipe; + bit += queue; + + return bit; +} + +void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit, + int *me, int *pipe, int *queue) +{ + *queue = bit % adev->gfx.me.num_queue_per_pipe; + *pipe = (bit / adev->gfx.me.num_queue_per_pipe) + % adev->gfx.me.num_pipe_per_me; + *me = (bit / adev->gfx.me.num_queue_per_pipe) + / adev->gfx.me.num_pipe_per_me; +} + +bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, + int me, int pipe, int queue) +{ + return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue), + adev->gfx.me.queue_bitmap); +} + /** * amdgpu_gfx_scratch_get - Allocate a scratch register * @@ -199,6 +230,28 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; } +void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) +{ + int i, queue, me; + + for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) { + queue = i % adev->gfx.me.num_queue_per_pipe; + me = (i / adev->gfx.me.num_queue_per_pipe) + / adev->gfx.me.num_pipe_per_me; + + if (me >= adev->gfx.me.num_me) + break; + /* policy: amdgpu owns the first queue per pipe at this stage + * will extend to mulitple queues per pipe later */ + if (me == 0 && queue < 1) + set_bit(i, adev->gfx.me.queue_bitmap); + } + + /* update the number of active graphics rings */ + adev->gfx.num_gfx_rings = + bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); +} + static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, struct amdgpu_ring *ring) { @@ -213,7 +266,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) continue; - amdgpu_gfx_bit_to_queue(adev, queue_bit, &mec, &pipe, &queue); + amdgpu_gfx_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); /* * 1. Using pipes 2/3 from MEC 2 seems cause problems. @@ -266,8 +319,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, return r; } -void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq) +void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) { amdgpu_device_wb_free(ring->adev, ring->adev->virt.reg_val_offs); amdgpu_ring_fini(ring); @@ -306,9 +358,9 @@ int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, return 0; } -/* create MQD for each compute queue */ -int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, - unsigned mqd_size) +/* create MQD for each compute/gfx queue */ +int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, + unsigned mqd_size) { struct amdgpu_ring *ring = NULL; int r, i; @@ -335,6 +387,27 @@ int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); } + if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { + /* create MQD for each KGQ */ + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + if (!ring->mqd_obj) { + r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, + &ring->mqd_gpu_addr, &ring->mqd_ptr); + if (r) { + dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); + return r; + } + + /* prepare MQD backup */ + adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL); + if (!adev->gfx.me.mqd_backup[i]) + dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); + } + } + } + /* create MQD for each KCQ */ for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; @@ -343,7 +416,7 @@ int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, &ring->mqd_gpu_addr, &ring->mqd_ptr); if (r) { - dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); + dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); return r; } @@ -357,11 +430,21 @@ int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, return 0; } -void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev) +void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev) { struct amdgpu_ring *ring = NULL; int i; + if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + kfree(adev->gfx.me.mqd_backup[i]); + amdgpu_bo_free_kernel(&ring->mqd_obj, + &ring->mqd_gpu_addr, + &ring->mqd_ptr); + } + } + for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; kfree(adev->gfx.mec.mqd_backup[i]); @@ -377,6 +460,73 @@ void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev) &ring->mqd_ptr); } +int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *kiq_ring = &kiq->ring; + int i; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * + adev->gfx.num_compute_rings)) + return -ENOMEM; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) + kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i], + RESET_QUEUES, 0, 0); + + return amdgpu_ring_test_ring(kiq_ring); +} + +int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + uint64_t queue_mask = 0; + int r, i; + + if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources) + return -EINVAL; + + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { + if (!test_bit(i, adev->gfx.mec.queue_bitmap)) + continue; + + /* This situation may be hit in the future if a new HW + * generation exposes more than 64 queues. If so, the + * definition of queue_mask needs updating */ + if (WARN_ON(i > (sizeof(queue_mask)*8))) { + DRM_ERROR("Invalid KCQ enabled: %d\n", i); + break; + } + + queue_mask |= (1ull << i); + } + + DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe, + kiq_ring->queue); + + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * + adev->gfx.num_compute_rings + + kiq->pmf->set_resources_size); + if (r) { + DRM_ERROR("Failed to lock KIQ (%d).\n", r); + return r; + } + + kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); + for (i = 0; i < adev->gfx.num_compute_rings; i++) + kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]); + + r = amdgpu_ring_test_helper(kiq_ring); + if (r) + DRM_ERROR("KCQ enable failed\n"); + + return r; +} + /* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable * * @adev: amdgpu_device pointer @@ -390,10 +540,12 @@ void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev) void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) { - if (!(adev->powerplay.pp_feature & PP_GFXOFF_MASK)) + if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) return; - if (!adev->powerplay.pp_funcs || !adev->powerplay.pp_funcs->set_powergating_by_smu) + if (!is_support_sw_smu(adev) && + (!adev->powerplay.pp_funcs || + !adev->powerplay.pp_funcs->set_powergating_by_smu)) return; @@ -413,3 +565,102 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) mutex_unlock(&adev->gfx.gfx_off_mutex); } + +int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev) +{ + int r; + struct ras_fs_if fs_info = { + .sysfs_name = "gfx_err_count", + .debugfs_name = "gfx_err_inject", + }; + struct ras_ih_if ih_info = { + .cb = amdgpu_gfx_process_ras_data_cb, + }; + + if (!adev->gfx.ras_if) { + adev->gfx.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!adev->gfx.ras_if) + return -ENOMEM; + adev->gfx.ras_if->block = AMDGPU_RAS_BLOCK__GFX; + adev->gfx.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->gfx.ras_if->sub_block_index = 0; + strcpy(adev->gfx.ras_if->name, "gfx"); + } + fs_info.head = ih_info.head = *adev->gfx.ras_if; + + r = amdgpu_ras_late_init(adev, adev->gfx.ras_if, + &fs_info, &ih_info); + if (r) + goto free; + + if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) { + r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); + if (r) + goto late_fini; + } else { + /* free gfx ras_if if ras is not supported */ + r = 0; + goto free; + } + + return 0; +late_fini: + amdgpu_ras_late_fini(adev, adev->gfx.ras_if, &ih_info); +free: + kfree(adev->gfx.ras_if); + adev->gfx.ras_if = NULL; + return r; +} + +void amdgpu_gfx_ras_fini(struct amdgpu_device *adev) +{ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && + adev->gfx.ras_if) { + struct ras_common_if *ras_if = adev->gfx.ras_if; + struct ras_ih_if ih_info = { + .head = *ras_if, + .cb = amdgpu_gfx_process_ras_data_cb, + }; + + amdgpu_ras_late_fini(adev, ras_if, &ih_info); + kfree(ras_if); + } +} + +int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, + void *err_data, + struct amdgpu_iv_entry *entry) +{ + /* TODO ue will trigger an interrupt. + * + * When “Full RAS” is enabled, the per-IP interrupt sources should + * be disabled and the driver should only look for the aggregated + * interrupt via sync flood + */ + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + if (adev->gfx.funcs->query_ras_error_count) + adev->gfx.funcs->query_ras_error_count(adev, err_data); + amdgpu_ras_reset_gpu(adev, 0); + } + return AMDGPU_RAS_SUCCESS; +} + +int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct ras_common_if *ras_if = adev->gfx.ras_if; + struct ras_dispatch_if ih_data = { + .entry = entry, + }; + + if (!ras_if) + return 0; + + ih_data.head = *ras_if; + + DRM_ERROR("CP ECC ERROR IRQ\n"); + amdgpu_ras_interrupt_dispatch(adev, &ih_data); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index f790e15bcd08..0ae0a2715b0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -38,6 +38,7 @@ #define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L #define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L +#define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES struct amdgpu_mec { @@ -54,12 +55,41 @@ struct amdgpu_mec { DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); }; +enum amdgpu_unmap_queues_action { + PREEMPT_QUEUES = 0, + RESET_QUEUES, + DISABLE_PROCESS_QUEUES, + PREEMPT_QUEUES_NO_UNMAP, +}; + +struct kiq_pm4_funcs { + /* Support ASIC-specific kiq pm4 packets*/ + void (*kiq_set_resources)(struct amdgpu_ring *kiq_ring, + uint64_t queue_mask); + void (*kiq_map_queues)(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring); + void (*kiq_unmap_queues)(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + enum amdgpu_unmap_queues_action action, + u64 gpu_addr, u64 seq); + void (*kiq_query_status)(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + u64 addr, + u64 seq); + /* Packet sizes */ + int set_resources_size; + int map_queues_size; + int unmap_queues_size; + int query_status_size; +}; + struct amdgpu_kiq { u64 eop_gpu_addr; struct amdgpu_bo *eop_obj; spinlock_t ring_lock; struct amdgpu_ring ring; struct amdgpu_irq_src irq; + const struct kiq_pm4_funcs *pmf; }; /* @@ -131,6 +161,11 @@ struct amdgpu_gfx_config { uint32_t double_offchip_lds_buf; /* cached value of DB_DEBUG2 */ uint32_t db_debug2; + /* gfx10 specific config */ + uint32_t num_sc_per_sh; + uint32_t num_packer_per_sc; + uint32_t pa_sc_tile_steering_override; + uint64_t tcc_disabled_mask; }; struct amdgpu_cu_info { @@ -161,40 +196,48 @@ struct amdgpu_gfx_funcs { uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst); void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, - u32 queue); + u32 queue, u32 vmid); + int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if); + int (*query_ras_error_count) (struct amdgpu_device *adev, void *ras_error_status); }; -struct amdgpu_ngg_buf { - struct amdgpu_bo *bo; - uint64_t gpu_addr; - uint32_t size; - uint32_t bo_size; +struct sq_work { + struct work_struct work; + unsigned ih_data; }; -enum { - NGG_PRIM = 0, - NGG_POS, - NGG_CNTL, - NGG_PARAM, - NGG_BUF_MAX +struct amdgpu_pfp { + struct amdgpu_bo *pfp_fw_obj; + uint64_t pfp_fw_gpu_addr; + uint32_t *pfp_fw_ptr; }; -struct amdgpu_ngg { - struct amdgpu_ngg_buf buf[NGG_BUF_MAX]; - uint32_t gds_reserve_addr; - uint32_t gds_reserve_size; - bool init; +struct amdgpu_ce { + struct amdgpu_bo *ce_fw_obj; + uint64_t ce_fw_gpu_addr; + uint32_t *ce_fw_ptr; }; -struct sq_work { - struct work_struct work; - unsigned ih_data; +struct amdgpu_me { + struct amdgpu_bo *me_fw_obj; + uint64_t me_fw_gpu_addr; + uint32_t *me_fw_ptr; + uint32_t num_me; + uint32_t num_pipe_per_me; + uint32_t num_queue_per_pipe; + void *mqd_backup[AMDGPU_MAX_GFX_RINGS]; + + /* These are the resources for which amdgpu takes ownership */ + DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_GFX_QUEUES); }; struct amdgpu_gfx { struct mutex gpu_clock_mutex; struct amdgpu_gfx_config config; struct amdgpu_rlc rlc; + struct amdgpu_pfp pfp; + struct amdgpu_ce ce; + struct amdgpu_me me; struct amdgpu_mec mec; struct amdgpu_kiq kiq; struct amdgpu_scratch scratch; @@ -224,6 +267,7 @@ struct amdgpu_gfx { uint32_t mec2_feature_version; bool mec_fw_write_wait; bool me_fw_write_wait; + bool cp_fw_write_wait; struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; unsigned num_gfx_rings; struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS]; @@ -246,9 +290,6 @@ struct amdgpu_gfx { uint32_t grbm_soft_reset; uint32_t srbm_soft_reset; - /* NGG */ - struct amdgpu_ngg ngg; - /* gfx off */ bool gfx_off_state; /* true: enabled, false: disabled */ struct mutex gfx_off_mutex; @@ -258,11 +299,14 @@ struct amdgpu_gfx { /* pipe reservation */ struct mutex pipe_reserve_mutex; DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + + /*ras */ + struct ras_common_if *ras_if; }; #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) #define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance)) -#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q)) +#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid)) /** * amdgpu_gfx_create_bitmask - create a bitmask @@ -287,24 +331,40 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, struct amdgpu_ring *ring, struct amdgpu_irq_src *irq); -void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq); +void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring); void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev); int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, unsigned hpd_size); -int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, - unsigned mqd_size); -void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev); +int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, + unsigned mqd_size); +void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev); +int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev); +int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev); void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev); -int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, int mec, - int pipe, int queue); -void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit, - int *mec, int *pipe, int *queue); +void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev); + +int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec, + int pipe, int queue); +void amdgpu_gfx_bit_to_mec_queue(struct amdgpu_device *adev, int bit, + int *mec, int *pipe, int *queue); bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec, int pipe, int queue); +int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me, + int pipe, int queue); +void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit, + int *me, int *pipe, int *queue); +bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me, + int pipe, int queue); void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable); - +int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev); +void amdgpu_gfx_ras_fini(struct amdgpu_device *adev); +int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, + void *err_data, + struct amdgpu_iv_entry *entry); +int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index d73367cab4f3..a12f33c0f5df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -24,7 +24,11 @@ * */ +#include <linux/io-64-nonatomic-lo-hi.h> + #include "amdgpu.h" +#include "amdgpu_ras.h" +#include "amdgpu_xgmi.h" /** * amdgpu_gmc_get_pde_for_bo - get the PDE for a BO @@ -80,6 +84,33 @@ uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo) } /** + * amdgpu_gmc_set_pte_pde - update the page tables using CPU + * + * @adev: amdgpu_device pointer + * @cpu_pt_addr: cpu address of the page table + * @gpu_page_idx: entry in the page table to update + * @addr: dst addr to write into pte/pde + * @flags: access flags + * + * Update the page tables using CPU. + */ +int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, + uint32_t gpu_page_idx, uint64_t addr, + uint64_t flags) +{ + void __iomem *ptr = (void *)cpu_pt_addr; + uint64_t value; + + /* + * The following is for PTE only. GART does not have PDEs. + */ + value = addr & 0x0000FFFFFFFFF000ULL; + value |= flags; + writeq(value, ptr + (gpu_page_idx * 8)); + return 0; +} + +/** * amdgpu_gmc_agp_addr - return the address in the AGP address space * * @tbo: TTM BO which needs the address, must be in GTT domain @@ -191,6 +222,14 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1); u64 size_af, size_bf; + if (amdgpu_sriov_vf(adev)) { + mc->agp_start = 0xffffffff; + mc->agp_end = 0x0; + mc->agp_size = 0; + + return; + } + if (mc->fb_start > mc->gart_start) { size_bf = (mc->fb_start & sixteen_gb_mask) - ALIGN(mc->gart_end + 1, sixteen_gb); @@ -213,3 +252,84 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) dev_info(adev->dev, "AGP: %lluM 0x%016llX - 0x%016llX\n", mc->agp_size >> 20, mc->agp_start, mc->agp_end); } + +/** + * amdgpu_gmc_filter_faults - filter VM faults + * + * @adev: amdgpu device structure + * @addr: address of the VM fault + * @pasid: PASID of the process causing the fault + * @timestamp: timestamp of the fault + * + * Returns: + * True if the fault was filtered and should not be processed further. + * False if the fault is a new one and needs to be handled. + */ +bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, + uint16_t pasid, uint64_t timestamp) +{ + struct amdgpu_gmc *gmc = &adev->gmc; + + uint64_t stamp, key = addr << 4 | pasid; + struct amdgpu_gmc_fault *fault; + uint32_t hash; + + /* If we don't have space left in the ring buffer return immediately */ + stamp = max(timestamp, AMDGPU_GMC_FAULT_TIMEOUT + 1) - + AMDGPU_GMC_FAULT_TIMEOUT; + if (gmc->fault_ring[gmc->last_fault].timestamp >= stamp) + return true; + + /* Try to find the fault in the hash */ + hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER); + fault = &gmc->fault_ring[gmc->fault_hash[hash].idx]; + while (fault->timestamp >= stamp) { + uint64_t tmp; + + if (fault->key == key) + return true; + + tmp = fault->timestamp; + fault = &gmc->fault_ring[fault->next]; + + /* Check if the entry was reused */ + if (fault->timestamp >= tmp) + break; + } + + /* Add the fault to the ring */ + fault = &gmc->fault_ring[gmc->last_fault]; + fault->key = key; + fault->timestamp = timestamp; + + /* And update the hash */ + fault->next = gmc->fault_hash[hash].idx; + gmc->fault_hash[hash].idx = gmc->last_fault++; + return false; +} + +int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) +{ + int r; + + if (adev->umc.funcs && adev->umc.funcs->ras_late_init) { + r = adev->umc.funcs->ras_late_init(adev); + if (r) + return r; + } + + if (adev->mmhub.funcs && adev->mmhub.funcs->ras_late_init) { + r = adev->mmhub.funcs->ras_late_init(adev); + if (r) + return r; + } + + return amdgpu_xgmi_ras_late_init(adev); +} + +void amdgpu_gmc_ras_fini(struct amdgpu_device *adev) +{ + amdgpu_umc_ras_fini(adev); + amdgpu_mmhub_ras_fini(adev); + amdgpu_xgmi_ras_fini(adev); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 81e6070d255b..b499a3de8bb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -43,14 +43,41 @@ */ #define AMDGPU_GMC_HOLE_MASK 0x0000ffffffffffffULL +/* + * Ring size as power of two for the log of recent faults. + */ +#define AMDGPU_GMC_FAULT_RING_ORDER 8 +#define AMDGPU_GMC_FAULT_RING_SIZE (1 << AMDGPU_GMC_FAULT_RING_ORDER) + +/* + * Hash size as power of two for the log of recent faults + */ +#define AMDGPU_GMC_FAULT_HASH_ORDER 8 +#define AMDGPU_GMC_FAULT_HASH_SIZE (1 << AMDGPU_GMC_FAULT_HASH_ORDER) + +/* + * Number of IH timestamp ticks until a fault is considered handled + */ +#define AMDGPU_GMC_FAULT_TIMEOUT 5000ULL + struct firmware; /* + * GMC page fault information + */ +struct amdgpu_gmc_fault { + uint64_t timestamp; + uint64_t next:AMDGPU_GMC_FAULT_RING_ORDER; + uint64_t key:52; +}; + +/* * VMHUB structures, functions & helpers */ struct amdgpu_vmhub { uint32_t ctx0_ptb_addr_lo32; uint32_t ctx0_ptb_addr_hi32; + uint32_t vm_inv_eng0_sem; uint32_t vm_inv_eng0_req; uint32_t vm_inv_eng0_ack; uint32_t vm_context0_cntl; @@ -63,28 +90,25 @@ struct amdgpu_vmhub { */ struct amdgpu_gmc_funcs { /* flush the vm tlb via mmio */ - void (*flush_gpu_tlb)(struct amdgpu_device *adev, - uint32_t vmid, uint32_t flush_type); + void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type); /* flush the vm tlb via ring */ uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); /* Change the VMID -> PASID mapping */ void (*emit_pasid_mapping)(struct amdgpu_ring *ring, unsigned vmid, unsigned pasid); - /* write pte/pde updates using the cpu */ - int (*set_pte_pde)(struct amdgpu_device *adev, - void *cpu_pt_addr, /* cpu addr of page table */ - uint32_t gpu_page_idx, /* pte/pde to update */ - uint64_t addr, /* addr to write into pte/pde */ - uint64_t flags); /* access flags */ /* enable/disable PRT support */ void (*set_prt)(struct amdgpu_device *adev, bool enable); - /* set pte flags based per asic */ - uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev, - uint32_t flags); + /* map mtype to hardware flags */ + uint64_t (*map_mtype)(struct amdgpu_device *adev, uint32_t flags); /* get the pde for a given mc addr */ void (*get_vm_pde)(struct amdgpu_device *adev, int level, u64 *dst, u64 *flags); + /* get the pte flags to use for a BO VA mapping */ + void (*get_vm_pte)(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping, + uint64_t *flags); }; struct amdgpu_xgmi { @@ -100,21 +124,52 @@ struct amdgpu_xgmi { /* gpu list in the same hive */ struct list_head head; bool supported; + struct ras_common_if *ras_if; }; struct amdgpu_gmc { + /* FB's physical address in MMIO space (for CPU to + * map FB). This is different compared to the agp/ + * gart/vram_start/end field as the later is from + * GPU's view and aper_base is from CPU's view. + */ resource_size_t aper_size; resource_size_t aper_base; /* for some chips with <= 32MB we need to lie * about vram size near mc fb location */ u64 mc_vram_size; u64 visible_vram_size; + /* AGP aperture start and end in MC address space + * Driver find a hole in the MC address space + * to place AGP by setting MC_VM_AGP_BOT/TOP registers + * Under VMID0, logical address == MC address. AGP + * aperture maps to physical bus or IOVA addressed. + * AGP aperture is used to simulate FB in ZFB case. + * AGP aperture is also used for page table in system + * memory (mainly for APU). + * + */ u64 agp_size; u64 agp_start; u64 agp_end; + /* GART aperture start and end in MC address space + * Driver find a hole in the MC address space + * to place GART by setting VM_CONTEXT0_PAGE_TABLE_START/END_ADDR + * registers + * Under VMID0, logical address inside GART aperture will + * be translated through gpuvm gart page table to access + * paged system memory + */ u64 gart_size; u64 gart_start; u64 gart_end; + /* Frame buffer aperture of this GPU device. Different from + * fb_start (see below), this only covers the local GPU device. + * Driver get fb_start from MC_VM_FB_LOCATION_BASE (set by vbios) + * and calculate vram_start of this local device by adding an + * offset inside the XGMI hive. + * Under VMID0, logical address == MC address + */ u64 vram_start; u64 vram_end; /* FB region , it's same as local vram region in single GPU, in XGMI @@ -133,6 +188,7 @@ struct amdgpu_gmc { uint32_t fw_version; struct amdgpu_irq_src vm_fault; uint32_t vram_type; + uint8_t vram_vendor; uint32_t srbm_soft_reset; bool prt_warning; uint64_t stolen_size; @@ -147,17 +203,24 @@ struct amdgpu_gmc { struct kfd_vm_fault_info *vm_fault_info; atomic_t vm_fault_info_updated; + struct amdgpu_gmc_fault fault_ring[AMDGPU_GMC_FAULT_RING_SIZE]; + struct { + uint64_t idx:AMDGPU_GMC_FAULT_RING_ORDER; + } fault_hash[AMDGPU_GMC_FAULT_HASH_SIZE]; + uint64_t last_fault:AMDGPU_GMC_FAULT_RING_ORDER; + const struct amdgpu_gmc_funcs *gmc_funcs; struct amdgpu_xgmi xgmi; + struct amdgpu_irq_src ecc_irq; }; -#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type)) +#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) -#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) +#define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags)) -#define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags)) +#define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags)) /** * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR @@ -189,6 +252,9 @@ static inline uint64_t amdgpu_gmc_sign_extend(uint64_t addr) void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level, uint64_t *addr, uint64_t *flags); +int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, + uint32_t gpu_page_idx, uint64_t addr, + uint64_t flags); uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo); uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo); void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc, @@ -197,5 +263,9 @@ void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc); void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc); +bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, + uint16_t pasid, uint64_t timestamp); +int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev); +void amdgpu_gmc_ras_fini(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index da7b1b92d9cf..627104401e84 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -22,7 +22,6 @@ * Authors: Christian König */ -#include <drm/drmP.h> #include "amdgpu.h" struct amdgpu_gtt_mgr { @@ -37,6 +36,47 @@ struct amdgpu_gtt_node { }; /** + * DOC: mem_info_gtt_total + * + * The amdgpu driver provides a sysfs API for reporting current total size of + * the GTT. + * The file mem_info_gtt_total is used for this, and returns the total size of + * the GTT block, in bytes + */ +static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + return snprintf(buf, PAGE_SIZE, "%llu\n", + (adev->mman.bdev.man[TTM_PL_TT].size) * PAGE_SIZE); +} + +/** + * DOC: mem_info_gtt_used + * + * The amdgpu driver provides a sysfs API for reporting current total amount of + * used GTT. + * The file mem_info_gtt_used is used for this, and returns the current used + * size of the GTT block, in bytes + */ +static ssize_t amdgpu_mem_info_gtt_used_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + return snprintf(buf, PAGE_SIZE, "%llu\n", + amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT])); +} + +static DEVICE_ATTR(mem_info_gtt_total, S_IRUGO, + amdgpu_mem_info_gtt_total_show, NULL); +static DEVICE_ATTR(mem_info_gtt_used, S_IRUGO, + amdgpu_mem_info_gtt_used_show, NULL); + +/** * amdgpu_gtt_mgr_init - init GTT manager and DRM MM * * @man: TTM memory type manager @@ -50,6 +90,7 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man, struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); struct amdgpu_gtt_mgr *mgr; uint64_t start, size; + int ret; mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); if (!mgr) @@ -61,6 +102,18 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man, spin_lock_init(&mgr->lock); atomic64_set(&mgr->available, p_size); man->priv = mgr; + + ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_total); + if (ret) { + DRM_ERROR("Failed to create device file mem_info_gtt_total\n"); + return ret; + } + ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_used); + if (ret) { + DRM_ERROR("Failed to create device file mem_info_gtt_used\n"); + return ret; + } + return 0; } @@ -74,12 +127,17 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man, */ static int amdgpu_gtt_mgr_fini(struct ttm_mem_type_manager *man) { + struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); struct amdgpu_gtt_mgr *mgr = man->priv; spin_lock(&mgr->lock); drm_mm_takedown(&mgr->mm); spin_unlock(&mgr->lock); kfree(mgr); man->priv = NULL; + + device_remove_file(adev->dev, &dev_attr_mem_info_gtt_total); + device_remove_file(adev->dev, &dev_attr_mem_info_gtt_used); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c index f2739995c335..70dbe343f51d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c @@ -23,9 +23,10 @@ * Authors: Dave Airlie * Alex Deucher */ + #include <linux/export.h> +#include <linux/pci.h> -#include <drm/drmP.h> #include <drm/drm_edid.h> #include <drm/amdgpu_drm.h> #include "amdgpu.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 0b8ef2d27d6b..60655834d649 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -28,13 +28,16 @@ */ #include <linux/seq_file.h> #include <linux/slab.h> -#include <drm/drmP.h> + #include <drm/amdgpu_drm.h> +#include <drm/drm_debugfs.h> + #include "amdgpu.h" #include "atom.h" #include "amdgpu_trace.h" #define AMDGPU_IB_TEST_TIMEOUT msecs_to_jiffies(1000) +#define AMDGPU_IB_TEST_GFX_XGMI_TIMEOUT msecs_to_jiffies(2000) /* * IB @@ -140,7 +143,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, /* ring tests don't use a job */ if (job) { vm = job->vm; - fence_ctx = job->base.s_fence->scheduled.context; + fence_ctx = job->base.s_fence ? + job->base.s_fence->scheduled.context : 0; } else { vm = NULL; fence_ctx = 0; @@ -208,6 +212,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, skip_preamble = ring->current_ctx == fence_ctx; if (job && ring->funcs->emit_cntxcntl) { status |= job->preamble_status; + status |= job->preemption_status; amdgpu_ring_emit_cntxcntl(ring, status); } @@ -216,9 +221,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, /* drop preamble IBs if we don't have a context switch */ if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && - skip_preamble && - !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) && - !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ + skip_preamble && + !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) && + !amdgpu_mcbp && + !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ continue; amdgpu_ring_emit_ib(ring, job, ib, status); @@ -344,6 +350,8 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) * cost waiting for it coming back under RUNTIME only */ tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT; + } else if (adev->gmc.xgmi.hive_id) { + tmo_gfx = AMDGPU_IB_TEST_GFX_XGMI_TIMEOUT; } for (i = 0; i < adev->num_rings; ++i) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index df9b173c3d0b..6f9289735e31 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -24,7 +24,7 @@ #include <linux/idr.h> #include <linux/dma-fence-array.h> -#include <drm/drmP.h> + #include "amdgpu.h" #include "amdgpu_trace.h" @@ -104,7 +104,7 @@ static void amdgpu_pasid_free_cb(struct dma_fence *fence, * * Free the pasid only after all the fences in resv are signaled. */ -void amdgpu_pasid_free_delayed(struct reservation_object *resv, +void amdgpu_pasid_free_delayed(struct dma_resv *resv, unsigned int pasid) { struct dma_fence *fence, **fences; @@ -112,7 +112,7 @@ void amdgpu_pasid_free_delayed(struct reservation_object *resv, unsigned count; int r; - r = reservation_object_get_fences_rcu(resv, NULL, &count, &fences); + r = dma_resv_get_fences_rcu(resv, NULL, &count, &fences); if (r) goto fallback; @@ -156,7 +156,7 @@ fallback: /* Not enough memory for the delayed delete, as last resort * block for all the fences to complete. */ - reservation_object_wait_timeout_rcu(resv, true, false, + dma_resv_wait_timeout_rcu(resv, true, false, MAX_SCHEDULE_TIMEOUT); amdgpu_pasid_free(pasid); } @@ -282,7 +282,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, !dma_fence_is_later(updates, (*id)->flushed_updates)) updates = NULL; - if ((*id)->owner != vm->entity.fence_context || + if ((*id)->owner != vm->direct.fence_context || job->vm_pd_addr != (*id)->pd_gpu_addr || updates || !(*id)->last_flush || ((*id)->last_flush->context != fence_context && @@ -349,7 +349,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, struct dma_fence *flushed; /* Check all the prerequisites to using this VMID */ - if ((*id)->owner != vm->entity.fence_context) + if ((*id)->owner != vm->direct.fence_context) continue; if ((*id)->pd_gpu_addr != job->vm_pd_addr) @@ -364,8 +364,12 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, if (updates && (!flushed || dma_fence_is_later(updates, flushed))) needs_flush = true; - /* Concurrent flushes are only possible starting with Vega10 */ - if (adev->asic_type < CHIP_VEGA10 && needs_flush) + /* Concurrent flushes are only possible starting with Vega10 and + * are broken on Navi10 and Navi14. + */ + if (needs_flush && (adev->asic_type < CHIP_VEGA10 || + adev->asic_type == CHIP_NAVI10 || + adev->asic_type == CHIP_NAVI14)) continue; /* Good, we can use this VMID. Remember this submission as @@ -445,7 +449,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, } id->pd_gpu_addr = job->vm_pd_addr; - id->owner = vm->entity.fence_context; + id->owner = vm->direct.fence_context; if (job->vm_needs_flush) { dma_fence_put(id->last_flush); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index 7625419f0fc2..8e58325bbca2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -72,7 +72,7 @@ struct amdgpu_vmid_mgr { int amdgpu_pasid_alloc(unsigned int bits); void amdgpu_pasid_free(unsigned int pasid); -void amdgpu_pasid_free_delayed(struct reservation_object *resv, +void amdgpu_pasid_free_delayed(struct dma_resv *resv, unsigned int pasid); bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 1c50be3ab8a9..6d8f05511aba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -21,7 +21,8 @@ * */ -#include <drm/drmP.h> +#include <linux/dma-mapping.h> + #include "amdgpu.h" #include "amdgpu_ih.h" @@ -142,6 +143,7 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) */ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) { + unsigned int count = AMDGPU_IH_MAX_NUM_IVS; u32 wptr; if (!ih->enabled || adev->shutdown) @@ -159,7 +161,7 @@ restart_ih: /* Order reading of wptr vs. reading of IH ring data */ rmb(); - while (ih->rptr != wptr) { + while (ih->rptr != wptr && --count) { amdgpu_irq_dispatch(adev, ih); ih->rptr &= ih->ptr_mask; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 113a1ba13d4a..4e0bb645176d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -24,6 +24,9 @@ #ifndef __AMDGPU_IH_H__ #define __AMDGPU_IH_H__ +/* Maximum number of IVs processed at once */ +#define AMDGPU_IH_MAX_NUM_IVS 32 + struct amdgpu_device; struct amdgpu_iv_entry; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c index 26482914dc4b..5cf142e849bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c @@ -29,8 +29,9 @@ */ #include <linux/compat.h> -#include <drm/drmP.h> #include <drm/amdgpu_drm.h> +#include <drm/drm_ioctl.h> + #include "amdgpu_drv.h" long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index af4c3b1af322..30d540d23b77 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -43,8 +43,11 @@ */ #include <linux/irq.h> -#include <drm/drmP.h> +#include <linux/pci.h> + #include <drm/drm_crtc_helper.h> +#include <drm/drm_irq.h> +#include <drm/drm_vblank.h> #include <drm/amdgpu_drm.h> #include "amdgpu.h" #include "amdgpu_ih.h" @@ -84,10 +87,13 @@ static void amdgpu_hotplug_work_func(struct work_struct *work) struct drm_device *dev = adev->ddev; struct drm_mode_config *mode_config = &dev->mode_config; struct drm_connector *connector; + struct drm_connector_list_iter iter; mutex_lock(&mode_config->mutex); - list_for_each_entry(connector, &mode_config->connector_list, head) + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) amdgpu_connector_hotplug(connector); + drm_connector_list_iter_end(&iter); mutex_unlock(&mode_config->mutex); /* Just fire off a uevent and let userspace tell us what to do */ drm_helper_hpd_irq_event(dev); @@ -150,6 +156,20 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg) ret = amdgpu_ih_process(adev, &adev->irq.ih); if (ret == IRQ_HANDLED) pm_runtime_mark_last_busy(dev->dev); + + /* For the hardware that cannot enable bif ring for both ras_controller_irq + * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status + * register to check whether the interrupt is triggered or not, and properly + * ack the interrupt if it is there + */ + if (adev->nbio.funcs && + adev->nbio.funcs->handle_ras_controller_intr_no_bifring) + adev->nbio.funcs->handle_ras_controller_intr_no_bifring(adev); + + if (adev->nbio.funcs && + adev->nbio.funcs->handle_ras_err_event_athub_intr_no_bifring) + adev->nbio.funcs->handle_ras_err_event_athub_intr_no_bifring(adev); + return ret; } @@ -225,10 +245,19 @@ int amdgpu_irq_init(struct amdgpu_device *adev) adev->irq.msi_enabled = false; if (amdgpu_msi_ok(adev)) { - int ret = pci_enable_msi(adev->pdev); - if (!ret) { + int nvec = pci_msix_vec_count(adev->pdev); + unsigned int flags; + + if (nvec <= 0) { + flags = PCI_IRQ_MSI; + } else { + flags = PCI_IRQ_MSI | PCI_IRQ_MSIX; + } + /* we only need one vector */ + nvec = pci_alloc_irq_vectors(adev->pdev, 1, 1, flags); + if (nvec > 0) { adev->irq.msi_enabled = true; - dev_dbg(adev->dev, "amdgpu: using MSI.\n"); + dev_dbg(adev->dev, "amdgpu: using MSI/MSI-X.\n"); } } @@ -251,7 +280,8 @@ int amdgpu_irq_init(struct amdgpu_device *adev) INIT_WORK(&adev->irq.ih2_work, amdgpu_irq_handle_ih2); adev->irq.installed = true; - r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq); + /* Use vector 0 for MSI-X */ + r = drm_irq_install(adev->ddev, pci_irq_vector(adev->pdev, 0)); if (r) { adev->irq.installed = false; if (!amdgpu_device_has_dc_support(adev)) @@ -281,7 +311,7 @@ void amdgpu_irq_fini(struct amdgpu_device *adev) drm_irq_uninstall(adev->ddev); adev->irq.installed = false; if (adev->irq.msi_enabled) - pci_disable_msi(adev->pdev); + pci_free_irq_vectors(adev->pdev); if (!amdgpu_device_has_dc_support(adev)) flush_work(&adev->hotplug_work); } @@ -366,7 +396,7 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev, * amdgpu_irq_dispatch - dispatch IRQ to IP blocks * * @adev: amdgpu device pointer - * @entry: interrupt vector pointer + * @ih: interrupt ring instance * * Dispatches IRQ to IP blocks. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 0a17fb1af204..4fb20e870e63 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -24,7 +24,7 @@ #include <linux/kthread.h> #include <linux/wait.h> #include <linux/sched.h> -#include <drm/drmP.h> + #include "amdgpu.h" #include "amdgpu_trace.h" @@ -51,6 +51,8 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job) if (amdgpu_device_should_recover_gpu(ring->adev)) amdgpu_device_gpu_recover(ring->adev, job); + else + drm_sched_suspend_timeout(&ring->sched); } int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, @@ -216,7 +218,7 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) struct amdgpu_ring *ring = to_amdgpu_ring(sched_job->sched); struct dma_fence *fence = NULL, *finished; struct amdgpu_job *job; - int r; + int r = 0; job = to_amdgpu_job(sched_job); finished = &job->base.s_fence->finished; @@ -241,9 +243,49 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) job->fence = dma_fence_get(fence); amdgpu_job_free_resources(job); + + fence = r ? ERR_PTR(r) : fence; return fence; } +#define to_drm_sched_job(sched_job) \ + container_of((sched_job), struct drm_sched_job, queue_node) + +void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) +{ + struct drm_sched_job *s_job; + struct drm_sched_entity *s_entity = NULL; + int i; + + /* Signal all jobs not yet scheduled */ + for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { + struct drm_sched_rq *rq = &sched->sched_rq[i]; + + if (!rq) + continue; + + spin_lock(&rq->lock); + list_for_each_entry(s_entity, &rq->entities, list) { + while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) { + struct drm_sched_fence *s_fence = s_job->s_fence; + + dma_fence_signal(&s_fence->scheduled); + dma_fence_set_error(&s_fence->finished, -EHWPOISON); + dma_fence_signal(&s_fence->finished); + } + } + spin_unlock(&rq->lock); + } + + /* Signal all jobs already scheduled to HW */ + list_for_each_entry(s_job, &sched->ring_mirror_list, node) { + struct drm_sched_fence *s_fence = s_job->s_fence; + + dma_fence_set_error(&s_fence->finished, -EHWPOISON); + dma_fence_signal(&s_fence->finished); + } +} + const struct drm_sched_backend_ops amdgpu_sched_ops = { .dependency = amdgpu_job_dependency, .run_job = amdgpu_job_run, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index e1b46a6703de..dc7ee9358dcd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -29,6 +29,8 @@ #define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means context switch occured */ #define AMDGPU_HAVE_CTX_SWITCH (1 << 2) +/* bit set means IB is preempted */ +#define AMDGPU_IB_PREEMPTED (1 << 3) #define to_amdgpu_job(sched_job) \ container_of((sched_job), struct amdgpu_job, base) @@ -45,6 +47,7 @@ struct amdgpu_job { struct amdgpu_ib *ibs; struct dma_fence *fence; /* the hw fence */ uint32_t preamble_status; + uint32_t preemption_status; uint32_t num_ibs; void *owner; bool vm_needs_flush; @@ -73,4 +76,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, void *owner, struct dma_fence **f); int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring, struct dma_fence **fence); + +void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index e860412043bb..b6db28a570c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -25,8 +25,9 @@ * Alex Deucher * Jerome Glisse */ -#include <drm/drmP.h> + #include "amdgpu.h" +#include <drm/drm_debugfs.h> #include <drm/amdgpu_drm.h> #include "amdgpu_sched.h" #include "amdgpu_uvd.h" @@ -35,12 +36,15 @@ #include <linux/vga_switcheroo.h> #include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/pci.h> #include <linux/pm_runtime.h> #include "amdgpu_amdkfd.h" #include "amdgpu_gem.h" #include "amdgpu_display.h" +#include "amdgpu_ras.h" -static void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) +void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) { struct amdgpu_gpu_instance *gpu_instance; int i; @@ -101,7 +105,7 @@ done_free: dev->dev_private = NULL; } -static void amdgpu_register_gpu_instance(struct amdgpu_device *adev) +void amdgpu_register_gpu_instance(struct amdgpu_device *adev) { struct amdgpu_gpu_instance *gpu_instance; @@ -140,41 +144,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) struct amdgpu_device *adev; int r, acpi_status; -#ifdef CONFIG_DRM_AMDGPU_SI - if (!amdgpu_si_support) { - switch (flags & AMD_ASIC_MASK) { - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_VERDE: - case CHIP_OLAND: - case CHIP_HAINAN: - dev_info(dev->dev, - "SI support provided by radeon.\n"); - dev_info(dev->dev, - "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n" - ); - return -ENODEV; - } - } -#endif -#ifdef CONFIG_DRM_AMDGPU_CIK - if (!amdgpu_cik_support) { - switch (flags & AMD_ASIC_MASK) { - case CHIP_KAVERI: - case CHIP_BONAIRE: - case CHIP_HAWAII: - case CHIP_KABINI: - case CHIP_MULLINS: - dev_info(dev->dev, - "CIK support provided by radeon.\n"); - dev_info(dev->dev, - "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n" - ); - return -ENODEV; - } - } -#endif - adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL); if (adev == NULL) { return -ENOMEM; @@ -221,7 +190,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) pm_runtime_put_autosuspend(dev->dev); } - amdgpu_register_gpu_instance(adev); out: if (r) { /* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */ @@ -296,6 +264,17 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->ver = adev->pm.fw_version; fw_info->feature = 0; break; + case AMDGPU_INFO_FW_TA: + if (query_fw->index > 1) + return -EINVAL; + if (query_fw->index == 0) { + fw_info->ver = adev->psp.ta_fw_version; + fw_info->feature = adev->psp.ta_xgmi_ucode_version; + } else { + fw_info->ver = adev->psp.ta_fw_version; + fw_info->feature = adev->psp.ta_ras_ucode_version; + } + break; case AMDGPU_INFO_FW_SDMA: if (query_fw->index >= adev->sdma.num_instances) return -EINVAL; @@ -393,23 +372,38 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, break; case AMDGPU_HW_IP_VCN_DEC: type = AMD_IP_BLOCK_TYPE_VCN; - if (adev->vcn.ring_dec.sched.ready) - ++num_rings; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->uvd.harvest_config & (1 << i)) + continue; + + if (adev->vcn.inst[i].ring_dec.sched.ready) + ++num_rings; + } ib_start_alignment = 16; ib_size_alignment = 16; break; case AMDGPU_HW_IP_VCN_ENC: type = AMD_IP_BLOCK_TYPE_VCN; - for (i = 0; i < adev->vcn.num_enc_rings; i++) - if (adev->vcn.ring_enc[i].sched.ready) - ++num_rings; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->uvd.harvest_config & (1 << i)) + continue; + + for (j = 0; j < adev->vcn.num_enc_rings; j++) + if (adev->vcn.inst[i].ring_enc[j].sched.ready) + ++num_rings; + } ib_start_alignment = 64; ib_size_alignment = 1; break; case AMDGPU_HW_IP_VCN_JPEG: type = AMD_IP_BLOCK_TYPE_VCN; - if (adev->vcn.ring_jpeg.sched.ready) - ++num_rings; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->uvd.harvest_config & (1 << i)) + continue; + + if (adev->vcn.inst[i].ring_jpeg.sched.ready) + ++num_rings; + } ib_start_alignment = 16; ib_size_alignment = 16; break; @@ -578,13 +572,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file struct drm_amdgpu_info_gds gds_info; memset(&gds_info, 0, sizeof(gds_info)); - gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size; - gds_info.compute_partition_size = adev->gds.mem.cs_partition_size; - gds_info.gds_total_size = adev->gds.mem.total_size; - gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size; - gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size; - gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size; - gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size; + gds_info.compute_partition_size = adev->gds.gds_size; + gds_info.gds_total_size = adev->gds.gds_size; + gds_info.gws_per_compute_partition = adev->gds.gws_size; + gds_info.oa_per_compute_partition = adev->gds.oa_size; return copy_to_user(out, &gds_info, min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0; } @@ -592,9 +583,12 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file struct drm_amdgpu_info_vram_gtt vram_gtt; vram_gtt.vram_size = adev->gmc.real_vram_size - - atomic64_read(&adev->vram_pin_size); - vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size - - atomic64_read(&adev->visible_pin_size); + atomic64_read(&adev->vram_pin_size) - + AMDGPU_VM_RESERVED_VRAM; + vram_gtt.vram_cpu_accessible_size = + min(adev->gmc.visible_vram_size - + atomic64_read(&adev->visible_pin_size), + vram_gtt.vram_size); vram_gtt.gtt_size = adev->mman.bdev.man[TTM_PL_TT].size; vram_gtt.gtt_size *= PAGE_SIZE; vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size); @@ -607,15 +601,18 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file memset(&mem, 0, sizeof(mem)); mem.vram.total_heap_size = adev->gmc.real_vram_size; mem.vram.usable_heap_size = adev->gmc.real_vram_size - - atomic64_read(&adev->vram_pin_size); + atomic64_read(&adev->vram_pin_size) - + AMDGPU_VM_RESERVED_VRAM; mem.vram.heap_usage = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; mem.cpu_accessible_vram.total_heap_size = adev->gmc.visible_vram_size; - mem.cpu_accessible_vram.usable_heap_size = adev->gmc.visible_vram_size - - atomic64_read(&adev->visible_pin_size); + mem.cpu_accessible_vram.usable_heap_size = + min(adev->gmc.visible_vram_size - + atomic64_read(&adev->visible_pin_size), + mem.vram.usable_heap_size); mem.cpu_accessible_vram.heap_usage = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); mem.cpu_accessible_vram.max_allocation = @@ -650,20 +647,27 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) sh_num = 0xffffffff; + if (info->read_mmr_reg.count > 128) + return -EINVAL; + regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL); if (!regs) return -ENOMEM; alloc_size = info->read_mmr_reg.count * sizeof(*regs); - for (i = 0; i < info->read_mmr_reg.count; i++) + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < info->read_mmr_reg.count; i++) { if (amdgpu_asic_read_register(adev, se_num, sh_num, info->read_mmr_reg.dword_offset + i, ®s[i])) { DRM_DEBUG_KMS("unallowed offset %#x\n", info->read_mmr_reg.dword_offset + i); kfree(regs); + amdgpu_gfx_off_ctrl(adev, true); return -EFAULT; } + } + amdgpu_gfx_off_ctrl(adev, true); n = copy_to_user(out, regs, min(size, alloc_size)); kfree(regs); return n ? -EFAULT : 0; @@ -684,6 +688,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file if (adev->pm.dpm_enabled) { dev_info.max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 10; dev_info.max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 10; + } else if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev) && + adev->virt.ops->get_pp_clk) { + dev_info.max_engine_clock = amdgpu_virt_get_sclk(adev, false) * 10; + dev_info.max_memory_clock = amdgpu_virt_get_mclk(adev, false) * 10; } else { dev_info.max_engine_clock = adev->clock.default_sclk * 10; dev_info.max_memory_clock = adev->clock.default_mclk * 10; @@ -696,7 +704,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.ids_flags = 0; if (adev->flags & AMD_IS_APU) dev_info.ids_flags |= AMDGPU_IDS_FLAGS_FUSION; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION; vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; @@ -730,17 +738,6 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.vce_harvest_config = adev->vce.harvest_config; dev_info.gc_double_offchip_lds_buf = adev->gfx.config.double_offchip_lds_buf; - - if (amdgpu_ngg) { - dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[NGG_PRIM].gpu_addr; - dev_info.prim_buf_size = adev->gfx.ngg.buf[NGG_PRIM].size; - dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[NGG_POS].gpu_addr; - dev_info.pos_buf_size = adev->gfx.ngg.buf[NGG_POS].size; - dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[NGG_CNTL].gpu_addr; - dev_info.cntl_sb_buf_size = adev->gfx.ngg.buf[NGG_CNTL].size; - dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[NGG_PARAM].gpu_addr; - dev_info.param_buf_size = adev->gfx.ngg.buf[NGG_PARAM].size; - } dev_info.wave_front_size = adev->gfx.cu_info.wave_front_size; dev_info.num_shader_visible_vgprs = adev->gfx.config.max_gprs; dev_info.num_cu_per_sh = adev->gfx.config.max_cu_per_sh; @@ -749,6 +746,12 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.gs_prim_buffer_depth = adev->gfx.config.gs_prim_buffer_depth; dev_info.max_gs_waves_per_vgt = adev->gfx.config.max_gs_threads; + if (adev->family >= AMDGPU_FAMILY_NV) + dev_info.pa_sc_tile_steering_override = + adev->gfx.config.pa_sc_tile_steering_override; + + dev_info.tcc_disabled_mask = adev->gfx.config.tcc_disabled_mask; + return copy_to_user(out, &dev_info, min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0; } @@ -909,6 +912,18 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file case AMDGPU_INFO_VRAM_LOST_COUNTER: ui32 = atomic_read(&adev->vram_lost_counter); return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0; + case AMDGPU_INFO_RAS_ENABLED_FEATURES: { + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + uint64_t ras_mask; + + if (!ras) + return -EINVAL; + ras_mask = (uint64_t)ras->supported << 32 | ras->features; + + return copy_to_user(out, &ras_mask, + min_t(u64, size, sizeof(ras_mask))) ? + -EFAULT : 0; + } default: DRM_DEBUG_KMS("Invalid request %d\n", info->query); return -EINVAL; @@ -949,7 +964,13 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) int r, pasid; /* Ensure IB tests are run on ring */ - flush_delayed_work(&adev->late_init_work); + flush_delayed_work(&adev->delayed_init_work); + + + if (amdgpu_ras_intr_triggered()) { + DRM_ERROR("RAS Intr triggered, device disabled!!"); + return -EHWPOISON; + } file_priv->driver_priv = NULL; @@ -978,7 +999,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) goto error_vm; } - if (amdgpu_sriov_vf(adev)) { + if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK; r = amdgpu_map_static_csa(adev, &fpriv->vm, adev->virt.csa_obj, @@ -1041,7 +1062,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, amdgpu_vm_bo_rmv(adev, fpriv->prt_va); - if (amdgpu_sriov_vf(adev)) { + if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { /* TODO: how to handle reserve failure */ BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true)); amdgpu_vm_bo_rmv(adev, fpriv->csa_va); @@ -1056,7 +1077,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, amdgpu_vm_fini(adev, &fpriv->vm); if (pasid) - amdgpu_pasid_free_delayed(pd->tbo.resv, pasid); + amdgpu_pasid_free_delayed(pd->tbo.base.resv, pasid); amdgpu_bo_unref(&pd); idr_for_each_entry(&fpriv->bo_list_handles, list, handle) @@ -1328,6 +1349,16 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data) seq_printf(m, "ASD feature version: %u, firmware version: 0x%08x\n", fw_info.feature, fw_info.ver); + query_fw.fw_type = AMDGPU_INFO_FW_TA; + for (i = 0; i < 2; i++) { + query_fw.index = i; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + continue; + seq_printf(m, "TA %s feature version: %u, firmware version: 0x%08x\n", + i ? "RAS" : "XGMI", fw_info.feature, fw_info.ver); + } + /* SMC */ query_fw.fw_type = AMDGPU_INFO_FW_SMC; ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h new file mode 100644 index 000000000000..78fe49033543 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -0,0 +1,101 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_MES_H__ +#define __AMDGPU_MES_H__ + +struct amdgpu_mes_funcs; + +struct amdgpu_mes { + struct amdgpu_adev *adev; + + const struct firmware *fw; + + /* mes ucode */ + struct amdgpu_bo *ucode_fw_obj; + uint64_t ucode_fw_gpu_addr; + uint32_t *ucode_fw_ptr; + uint32_t ucode_fw_version; + uint64_t uc_start_addr; + + /* mes ucode data */ + struct amdgpu_bo *data_fw_obj; + uint64_t data_fw_gpu_addr; + uint32_t *data_fw_ptr; + uint32_t data_fw_version; + uint64_t data_start_addr; + + /* ip specific functions */ + struct amdgpu_mes_funcs *funcs; +}; + +struct mes_add_queue_input { + uint32_t process_id; + uint64_t page_table_base_addr; + uint64_t process_va_start; + uint64_t process_va_end; + uint64_t process_quantum; + uint64_t process_context_addr; + uint64_t gang_quantum; + uint64_t gang_context_addr; + uint32_t inprocess_gang_priority; + uint32_t gang_global_priority_level; + uint32_t doorbell_offset; + uint64_t mqd_addr; + uint64_t wptr_addr; + uint32_t queue_type; + uint32_t paging; +}; + +struct mes_remove_queue_input { + uint32_t doorbell_offset; + uint64_t gang_context_addr; +}; + +struct mes_suspend_gang_input { + bool suspend_all_gangs; + uint64_t gang_context_addr; + uint64_t suspend_fence_addr; + uint32_t suspend_fence_value; +}; + +struct mes_resume_gang_input { + bool resume_all_gangs; + uint64_t gang_context_addr; +}; + +struct amdgpu_mes_funcs { + int (*add_hw_queue)(struct amdgpu_mes *mes, + struct mes_add_queue_input *input); + + int (*remove_hw_queue)(struct amdgpu_mes *mes, + struct mes_remove_queue_input *input); + + int (*suspend_gang)(struct amdgpu_mes *mes, + struct mes_suspend_gang_input *input); + + int (*resume_gang)(struct amdgpu_mes *mes, + struct mes_resume_gang_input *input); +}; + +#endif /* __AMDGPU_MES_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c new file mode 100644 index 000000000000..676c48c02d77 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c @@ -0,0 +1,70 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_ras.h" + +int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev) +{ + int r; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + struct ras_fs_if fs_info = { + .sysfs_name = "mmhub_err_count", + .debugfs_name = "mmhub_err_inject", + }; + + if (!adev->mmhub.ras_if) { + adev->mmhub.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!adev->mmhub.ras_if) + return -ENOMEM; + adev->mmhub.ras_if->block = AMDGPU_RAS_BLOCK__MMHUB; + adev->mmhub.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->mmhub.ras_if->sub_block_index = 0; + strcpy(adev->mmhub.ras_if->name, "mmhub"); + } + ih_info.head = fs_info.head = *adev->mmhub.ras_if; + r = amdgpu_ras_late_init(adev, adev->mmhub.ras_if, + &fs_info, &ih_info); + if (r || !amdgpu_ras_is_supported(adev, adev->mmhub.ras_if->block)) { + kfree(adev->mmhub.ras_if); + adev->mmhub.ras_if = NULL; + } + + return r; +} + +void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev) +{ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) && + adev->mmhub.ras_if) { + struct ras_common_if *ras_if = adev->mmhub.ras_if; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + + amdgpu_ras_late_fini(adev, ras_if, &ih_info); + kfree(ras_if); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h new file mode 100644 index 000000000000..1cd78940cf82 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __AMDGPU_MMHUB_H__ +#define __AMDGPU_MMHUB_H__ + +struct amdgpu_mmhub_funcs { + void (*ras_init)(struct amdgpu_device *adev); + int (*ras_late_init)(struct amdgpu_device *adev); + void (*query_ras_error_count)(struct amdgpu_device *adev, + void *ras_error_status); +}; + +struct amdgpu_mmhub { + struct ras_common_if *ras_if; + const struct amdgpu_mmhub_funcs *funcs; +}; + +int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev); +void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev); +#endif + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 3e6823fdd939..828b5167ff12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -45,446 +45,100 @@ #include <linux/firmware.h> #include <linux/module.h> -#include <linux/mmu_notifier.h> -#include <linux/interval_tree.h> -#include <drm/drmP.h> #include <drm/drm.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" /** - * struct amdgpu_mn + * amdgpu_mn_invalidate_gfx - callback to notify about mm change * - * @adev: amdgpu device pointer - * @mm: process address space - * @mn: MMU notifier structure - * @type: type of MMU notifier - * @work: destruction work item - * @node: hash table node to find structure by adev and mn - * @lock: rw semaphore protecting the notifier nodes - * @objects: interval tree containing amdgpu_mn_nodes - * @read_lock: mutex for recursive locking of @lock - * @recursion: depth of recursion - * - * Data for each amdgpu device and process address space. - */ -struct amdgpu_mn { - /* constant after initialisation */ - struct amdgpu_device *adev; - struct mm_struct *mm; - struct mmu_notifier mn; - enum amdgpu_mn_type type; - - /* only used on destruction */ - struct work_struct work; - - /* protected by adev->mn_lock */ - struct hlist_node node; - - /* objects protected by lock */ - struct rw_semaphore lock; - struct rb_root_cached objects; - struct mutex read_lock; - atomic_t recursion; -}; - -/** - * struct amdgpu_mn_node - * - * @it: interval node defining start-last of the affected address range - * @bos: list of all BOs in the affected address range - * - * Manages all BOs which are affected of a certain range of address space. - */ -struct amdgpu_mn_node { - struct interval_tree_node it; - struct list_head bos; -}; - -/** - * amdgpu_mn_destroy - destroy the MMU notifier - * - * @work: previously sheduled work item - * - * Lazy destroys the notifier from a work item - */ -static void amdgpu_mn_destroy(struct work_struct *work) -{ - struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work); - struct amdgpu_device *adev = amn->adev; - struct amdgpu_mn_node *node, *next_node; - struct amdgpu_bo *bo, *next_bo; - - mutex_lock(&adev->mn_lock); - down_write(&amn->lock); - hash_del(&amn->node); - rbtree_postorder_for_each_entry_safe(node, next_node, - &amn->objects.rb_root, it.rb) { - list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { - bo->mn = NULL; - list_del_init(&bo->mn_list); - } - kfree(node); - } - up_write(&amn->lock); - mutex_unlock(&adev->mn_lock); - mmu_notifier_unregister_no_release(&amn->mn, amn->mm); - kfree(amn); -} - -/** - * amdgpu_mn_release - callback to notify about mm destruction - * - * @mn: our notifier - * @mm: the mm this callback is about - * - * Shedule a work item to lazy destroy our notifier. - */ -static void amdgpu_mn_release(struct mmu_notifier *mn, - struct mm_struct *mm) -{ - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); - - INIT_WORK(&amn->work, amdgpu_mn_destroy); - schedule_work(&amn->work); -} - - -/** - * amdgpu_mn_lock - take the write side lock for this notifier - * - * @mn: our notifier - */ -void amdgpu_mn_lock(struct amdgpu_mn *mn) -{ - if (mn) - down_write(&mn->lock); -} - -/** - * amdgpu_mn_unlock - drop the write side lock for this notifier - * - * @mn: our notifier - */ -void amdgpu_mn_unlock(struct amdgpu_mn *mn) -{ - if (mn) - up_write(&mn->lock); -} - -/** - * amdgpu_mn_read_lock - take the read side lock for this notifier - * - * @amn: our notifier - */ -static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable) -{ - if (blockable) - mutex_lock(&amn->read_lock); - else if (!mutex_trylock(&amn->read_lock)) - return -EAGAIN; - - if (atomic_inc_return(&amn->recursion) == 1) - down_read_non_owner(&amn->lock); - mutex_unlock(&amn->read_lock); - - return 0; -} - -/** - * amdgpu_mn_read_unlock - drop the read side lock for this notifier - * - * @amn: our notifier - */ -static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn) -{ - if (atomic_dec_return(&amn->recursion) == 0) - up_read_non_owner(&amn->lock); -} - -/** - * amdgpu_mn_invalidate_node - unmap all BOs of a node - * - * @node: the node with the BOs to unmap - * @start: start of address range affected - * @end: end of address range affected + * @mni: the range (mm) is about to update + * @range: details on the invalidation + * @cur_seq: Value to pass to mmu_interval_set_seq() * * Block for operations on BOs to finish and mark pages as accessed and * potentially dirty. */ -static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, - unsigned long start, - unsigned long end) +static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) { - struct amdgpu_bo *bo; + struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); long r; - list_for_each_entry(bo, &node->bos, mn_list) { + if (!mmu_notifier_range_blockable(range)) + return false; - if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end)) - continue; + mutex_lock(&adev->notifier_lock); - r = reservation_object_wait_timeout_rcu(bo->tbo.resv, - true, false, MAX_SCHEDULE_TIMEOUT); - if (r <= 0) - DRM_ERROR("(%ld) failed to wait for user bo\n", r); + mmu_interval_set_seq(mni, cur_seq); - amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm); - } + r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false, + MAX_SCHEDULE_TIMEOUT); + mutex_unlock(&adev->notifier_lock); + if (r <= 0) + DRM_ERROR("(%ld) failed to wait for user bo\n", r); + return true; } -/** - * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change - * - * @mn: our notifier - * @range: mmu notifier context - * - * Block for operations on BOs to finish and mark pages as accessed and - * potentially dirty. - */ -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) -{ - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); - struct interval_tree_node *it; - unsigned long end; - - /* notification is exclusive, but interval is inclusive */ - end = range->end - 1; - - /* TODO we should be able to split locking for interval tree and - * amdgpu_mn_invalidate_node - */ - if (amdgpu_mn_read_lock(amn, range->blockable)) - return -EAGAIN; - - it = interval_tree_iter_first(&amn->objects, range->start, end); - while (it) { - struct amdgpu_mn_node *node; - - if (!range->blockable) { - amdgpu_mn_read_unlock(amn); - return -EAGAIN; - } - - node = container_of(it, struct amdgpu_mn_node, it); - it = interval_tree_iter_next(it, range->start, end); - - amdgpu_mn_invalidate_node(node, range->start, end); - } - - return 0; -} +static const struct mmu_interval_notifier_ops amdgpu_mn_gfx_ops = { + .invalidate = amdgpu_mn_invalidate_gfx, +}; /** - * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change + * amdgpu_mn_invalidate_hsa - callback to notify about mm change * - * @mn: our notifier - * @mm: the mm this callback is about - * @start: start of updated range - * @end: end of updated range + * @mni: the range (mm) is about to update + * @range: details on the invalidation + * @cur_seq: Value to pass to mmu_interval_set_seq() * - * We temporarily evict all BOs between start and end. This - * necessitates evicting all user-mode queues of the process. The BOs - * are restorted in amdgpu_mn_invalidate_range_end_hsa. + * We temporarily evict the BO attached to this range. This necessitates + * evicting all user-mode queues of the process. */ -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) +static bool amdgpu_mn_invalidate_hsa(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) { - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); - struct interval_tree_node *it; - unsigned long end; - - /* notification is exclusive, but interval is inclusive */ - end = range->end - 1; - - if (amdgpu_mn_read_lock(amn, range->blockable)) - return -EAGAIN; - - it = interval_tree_iter_first(&amn->objects, range->start, end); - while (it) { - struct amdgpu_mn_node *node; - struct amdgpu_bo *bo; - - if (!range->blockable) { - amdgpu_mn_read_unlock(amn); - return -EAGAIN; - } - - node = container_of(it, struct amdgpu_mn_node, it); - it = interval_tree_iter_next(it, range->start, end); + struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - list_for_each_entry(bo, &node->bos, mn_list) { - struct kgd_mem *mem = bo->kfd_bo; + if (!mmu_notifier_range_blockable(range)) + return false; - if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, - range->start, - end)) - amdgpu_amdkfd_evict_userptr(mem, range->mm); - } - } + mutex_lock(&adev->notifier_lock); - return 0; -} + mmu_interval_set_seq(mni, cur_seq); -/** - * amdgpu_mn_invalidate_range_end - callback to notify about mm change - * - * @mn: our notifier - * @mm: the mm this callback is about - * @start: start of updated range - * @end: end of updated range - * - * Release the lock again to allow new command submissions. - */ -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) -{ - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + amdgpu_amdkfd_evict_userptr(bo->kfd_bo, bo->notifier.mm); + mutex_unlock(&adev->notifier_lock); - amdgpu_mn_read_unlock(amn); + return true; } -static const struct mmu_notifier_ops amdgpu_mn_ops[] = { - [AMDGPU_MN_TYPE_GFX] = { - .release = amdgpu_mn_release, - .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx, - .invalidate_range_end = amdgpu_mn_invalidate_range_end, - }, - [AMDGPU_MN_TYPE_HSA] = { - .release = amdgpu_mn_release, - .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa, - .invalidate_range_end = amdgpu_mn_invalidate_range_end, - }, +static const struct mmu_interval_notifier_ops amdgpu_mn_hsa_ops = { + .invalidate = amdgpu_mn_invalidate_hsa, }; -/* Low bits of any reasonable mm pointer will be unused due to struct - * alignment. Use these bits to make a unique key from the mm pointer - * and notifier type. - */ -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) - -/** - * amdgpu_mn_get - create notifier context - * - * @adev: amdgpu device pointer - * @type: type of MMU notifier context - * - * Creates a notifier context for current->mm. - */ -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, - enum amdgpu_mn_type type) -{ - struct mm_struct *mm = current->mm; - struct amdgpu_mn *amn; - unsigned long key = AMDGPU_MN_KEY(mm, type); - int r; - - mutex_lock(&adev->mn_lock); - if (down_write_killable(&mm->mmap_sem)) { - mutex_unlock(&adev->mn_lock); - return ERR_PTR(-EINTR); - } - - hash_for_each_possible(adev->mn_hash, amn, node, key) - if (AMDGPU_MN_KEY(amn->mm, amn->type) == key) - goto release_locks; - - amn = kzalloc(sizeof(*amn), GFP_KERNEL); - if (!amn) { - amn = ERR_PTR(-ENOMEM); - goto release_locks; - } - - amn->adev = adev; - amn->mm = mm; - init_rwsem(&amn->lock); - amn->type = type; - amn->mn.ops = &amdgpu_mn_ops[type]; - amn->objects = RB_ROOT_CACHED; - mutex_init(&amn->read_lock); - atomic_set(&amn->recursion, 0); - - r = __mmu_notifier_register(&amn->mn, mm); - if (r) - goto free_amn; - - hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type)); - -release_locks: - up_write(&mm->mmap_sem); - mutex_unlock(&adev->mn_lock); - - return amn; - -free_amn: - up_write(&mm->mmap_sem); - mutex_unlock(&adev->mn_lock); - kfree(amn); - - return ERR_PTR(r); -} - /** * amdgpu_mn_register - register a BO for notifier updates * * @bo: amdgpu buffer object * @addr: userptr addr we should monitor * - * Registers an MMU notifier for the given BO at the specified address. + * Registers a mmu_notifier for the given BO at the specified address. * Returns 0 on success, -ERRNO if anything goes wrong. */ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) { - unsigned long end = addr + amdgpu_bo_size(bo) - 1; - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - enum amdgpu_mn_type type = - bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX; - struct amdgpu_mn *amn; - struct amdgpu_mn_node *node = NULL, *new_node; - struct list_head bos; - struct interval_tree_node *it; - - amn = amdgpu_mn_get(adev, type); - if (IS_ERR(amn)) - return PTR_ERR(amn); - - new_node = kmalloc(sizeof(*new_node), GFP_KERNEL); - if (!new_node) - return -ENOMEM; - - INIT_LIST_HEAD(&bos); - - down_write(&amn->lock); - - while ((it = interval_tree_iter_first(&amn->objects, addr, end))) { - kfree(node); - node = container_of(it, struct amdgpu_mn_node, it); - interval_tree_remove(&node->it, &amn->objects); - addr = min(it->start, addr); - end = max(it->last, end); - list_splice(&node->bos, &bos); - } - - if (!node) - node = new_node; - else - kfree(new_node); - - bo->mn = amn; - - node->it.start = addr; - node->it.last = end; - INIT_LIST_HEAD(&node->bos); - list_splice(&bos, &node->bos); - list_add(&bo->mn_list, &node->bos); - - interval_tree_insert(&node->it, &amn->objects); - - up_write(&amn->lock); - - return 0; + if (bo->kfd_bo) + return mmu_interval_notifier_insert(&bo->notifier, current->mm, + addr, amdgpu_bo_size(bo), + &amdgpu_mn_hsa_ops); + return mmu_interval_notifier_insert(&bo->notifier, current->mm, addr, + amdgpu_bo_size(bo), + &amdgpu_mn_gfx_ops); } /** @@ -492,39 +146,12 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) * * @bo: amdgpu buffer object * - * Remove any registration of MMU notifier updates from the buffer object. + * Remove any registration of mmu notifier updates from the buffer object. */ void amdgpu_mn_unregister(struct amdgpu_bo *bo) { - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct amdgpu_mn *amn; - struct list_head *head; - - mutex_lock(&adev->mn_lock); - - amn = bo->mn; - if (amn == NULL) { - mutex_unlock(&adev->mn_lock); + if (!bo->notifier.mm) return; - } - - down_write(&amn->lock); - - /* save the next list entry for later */ - head = bo->mn_list.next; - - bo->mn = NULL; - list_del_init(&bo->mn_list); - - if (list_empty(head)) { - struct amdgpu_mn_node *node; - - node = container_of(head, struct amdgpu_mn_node, bos); - interval_tree_remove(&node->it, &amn->objects); - kfree(node); - } - - up_write(&amn->lock); - mutex_unlock(&adev->mn_lock); + mmu_interval_notifier_remove(&bo->notifier); + bo->notifier.mm = NULL; } - diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index eb0f432f78fe..a292238f75eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -24,33 +24,20 @@ #ifndef __AMDGPU_MN_H__ #define __AMDGPU_MN_H__ -/* - * MMU Notifier - */ -struct amdgpu_mn; - -enum amdgpu_mn_type { - AMDGPU_MN_TYPE_GFX, - AMDGPU_MN_TYPE_HSA, -}; +#include <linux/types.h> +#include <linux/hmm.h> +#include <linux/rwsem.h> +#include <linux/workqueue.h> +#include <linux/interval_tree.h> -#if defined(CONFIG_MMU_NOTIFIER) -void amdgpu_mn_lock(struct amdgpu_mn *mn); -void amdgpu_mn_unlock(struct amdgpu_mn *mn); -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, - enum amdgpu_mn_type type); +#if defined(CONFIG_HMM_MIRROR) int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_mn_unregister(struct amdgpu_bo *bo); #else -static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} -static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} -static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, - enum amdgpu_mn_type type) -{ - return NULL; -} static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) { + DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, " + "add CONFIG_ZONE_DEVICE=y in config file to fix this\n"); return -ENODEV; } static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 889e443eeee7..eb9975f4decb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -58,7 +58,7 @@ struct amdgpu_hpd; #define to_amdgpu_encoder(x) container_of(x, struct amdgpu_encoder, base) #define to_amdgpu_framebuffer(x) container_of(x, struct amdgpu_framebuffer, base) -#define to_dm_plane_state(x) container_of(x, struct dm_plane_state, base); +#define to_dm_plane_state(x) container_of(x, struct dm_plane_state, base) #define AMDGPU_MAX_HPD_PINS 6 #define AMDGPU_MAX_CRTCS 6 @@ -331,8 +331,6 @@ struct amdgpu_mode_info { struct drm_property *audio_property; /* FMT dithering */ struct drm_property *dither_property; - /* maximum number of bits per channel for monitor color */ - struct drm_property *max_bpc_property; /* Adaptive Backlight Modulation (power feature) */ struct drm_property *abm_level_property; /* hardcoded DFP edid from BIOS */ @@ -406,7 +404,7 @@ struct amdgpu_crtc { struct amdgpu_flip_work *pflip_works; enum amdgpu_flip_status pflip_status; int deferred_flip_completion; - u64 last_flip_vblank; + u32 last_flip_vblank; /* pll sharing */ struct amdgpu_atom_ss ss; bool ss_enabled; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c new file mode 100644 index 000000000000..7d5c3a9de9ea --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "amdgpu.h" +#include "amdgpu_ras.h" + +int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev) +{ + int r; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + struct ras_fs_if fs_info = { + .sysfs_name = "pcie_bif_err_count", + .debugfs_name = "pcie_bif_err_inject", + }; + + if (!adev->nbio.ras_if) { + adev->nbio.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!adev->nbio.ras_if) + return -ENOMEM; + adev->nbio.ras_if->block = AMDGPU_RAS_BLOCK__PCIE_BIF; + adev->nbio.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->nbio.ras_if->sub_block_index = 0; + strcpy(adev->nbio.ras_if->name, "pcie_bif"); + } + ih_info.head = fs_info.head = *adev->nbio.ras_if; + r = amdgpu_ras_late_init(adev, adev->nbio.ras_if, + &fs_info, &ih_info); + if (r) + goto free; + + if (amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) { + r = amdgpu_irq_get(adev, &adev->nbio.ras_controller_irq, 0); + if (r) + goto late_fini; + r = amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0); + if (r) + goto late_fini; + } else { + r = 0; + goto free; + } + + return 0; +late_fini: + amdgpu_ras_late_fini(adev, adev->nbio.ras_if, &ih_info); +free: + kfree(adev->nbio.ras_if); + adev->nbio.ras_if = NULL; + return r; +} + +void amdgpu_nbio_ras_fini(struct amdgpu_device *adev) +{ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF) && + adev->nbio.ras_if) { + struct ras_common_if *ras_if = adev->nbio.ras_if; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + + amdgpu_ras_late_fini(adev, ras_if, &ih_info); + kfree(ras_if); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h new file mode 100644 index 000000000000..919bd566ba3c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -0,0 +1,101 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_NBIO_H__ +#define __AMDGPU_NBIO_H__ + +/* + * amdgpu nbio functions + */ +struct nbio_hdp_flush_reg { + u32 ref_and_mask_cp0; + u32 ref_and_mask_cp1; + u32 ref_and_mask_cp2; + u32 ref_and_mask_cp3; + u32 ref_and_mask_cp4; + u32 ref_and_mask_cp5; + u32 ref_and_mask_cp6; + u32 ref_and_mask_cp7; + u32 ref_and_mask_cp8; + u32 ref_and_mask_cp9; + u32 ref_and_mask_sdma0; + u32 ref_and_mask_sdma1; + u32 ref_and_mask_sdma2; + u32 ref_and_mask_sdma3; + u32 ref_and_mask_sdma4; + u32 ref_and_mask_sdma5; + u32 ref_and_mask_sdma6; + u32 ref_and_mask_sdma7; +}; + +struct amdgpu_nbio_funcs { + const struct nbio_hdp_flush_reg *hdp_flush_reg; + u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev); + u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev); + u32 (*get_pcie_index_offset)(struct amdgpu_device *adev); + u32 (*get_pcie_data_offset)(struct amdgpu_device *adev); + u32 (*get_rev_id)(struct amdgpu_device *adev); + void (*mc_access_enable)(struct amdgpu_device *adev, bool enable); + void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring); + u32 (*get_memsize)(struct amdgpu_device *adev); + void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, + bool use_doorbell, int doorbell_index, int doorbell_size); + void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell, + int doorbell_index, int instance); + void (*enable_doorbell_aperture)(struct amdgpu_device *adev, + bool enable); + void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev, + bool enable); + void (*ih_doorbell_range)(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index); + void (*enable_doorbell_interrupt)(struct amdgpu_device *adev, + bool enable); + void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, + bool enable); + void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev, + bool enable); + void (*get_clockgating_state)(struct amdgpu_device *adev, + u32 *flags); + void (*ih_control)(struct amdgpu_device *adev); + void (*init_registers)(struct amdgpu_device *adev); + void (*detect_hw_virt)(struct amdgpu_device *adev); + void (*remap_hdp_registers)(struct amdgpu_device *adev); + void (*handle_ras_controller_intr_no_bifring)(struct amdgpu_device *adev); + void (*handle_ras_err_event_athub_intr_no_bifring)(struct amdgpu_device *adev); + int (*init_ras_controller_interrupt)(struct amdgpu_device *adev); + int (*init_ras_err_event_athub_interrupt)(struct amdgpu_device *adev); + void (*query_ras_error_count)(struct amdgpu_device *adev, + void *ras_error_status); + int (*ras_late_init)(struct amdgpu_device *adev); +}; + +struct amdgpu_nbio { + const struct nbio_hdp_flush_reg *hdp_flush_reg; + struct amdgpu_irq_src ras_controller_irq; + struct amdgpu_irq_src ras_err_event_athub_irq; + struct ras_common_if *ras_if; + const struct amdgpu_nbio_funcs *funcs; +}; + +int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev); +void amdgpu_nbio_ras_fini(struct amdgpu_device *adev); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index ec9e45004bff..e3f16b49e970 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -31,7 +31,7 @@ */ #include <linux/list.h> #include <linux/slab.h> -#include <drm/drmP.h> + #include <drm/amdgpu_drm.h> #include <drm/drm_cache.h> #include "amdgpu.h" @@ -80,20 +80,19 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo) if (bo->pin_count > 0) amdgpu_bo_subtract_pin_size(bo); - if (bo->kfd_bo) - amdgpu_amdkfd_unreserve_memory_limit(bo); - amdgpu_bo_kunmap(bo); - if (bo->gem_base.import_attach) - drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg); - drm_gem_object_release(&bo->gem_base); - amdgpu_bo_unref(&bo->parent); + if (bo->tbo.base.import_attach) + drm_prime_gem_destroy(&bo->tbo.base, bo->tbo.sg); + drm_gem_object_release(&bo->tbo.base); + /* in case amdgpu_device_recover_vram got NULL of bo->parent */ if (!list_empty(&bo->shadow_list)) { mutex_lock(&adev->shadow_list_lock); list_del_init(&bo->shadow_list); mutex_unlock(&adev->shadow_list_lock); } + amdgpu_bo_unref(&bo->parent); + kfree(bo->metadata); kfree(bo); } @@ -247,8 +246,9 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev, bp.size = size; bp.byte_align = align; bp.domain = domain; - bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + bp.flags = cpu_addr ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED + : AMDGPU_GEM_CREATE_NO_CPU_ACCESS; + bp.flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; bp.type = ttm_bo_type_kernel; bp.resv = NULL; @@ -343,6 +343,70 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, } /** + * amdgpu_bo_create_kernel_at - create BO for kernel use at specific location + * + * @adev: amdgpu device object + * @offset: offset of the BO + * @size: size of the BO + * @domain: where to place it + * @bo_ptr: used to initialize BOs in structures + * @cpu_addr: optional CPU address mapping + * + * Creates a kernel BO at a specific offset in the address space of the domain. + * + * Returns: + * 0 on success, negative error code otherwise. + */ +int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev, + uint64_t offset, uint64_t size, uint32_t domain, + struct amdgpu_bo **bo_ptr, void **cpu_addr) +{ + struct ttm_operation_ctx ctx = { false, false }; + unsigned int i; + int r; + + offset &= PAGE_MASK; + size = ALIGN(size, PAGE_SIZE); + + r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE, domain, bo_ptr, + NULL, cpu_addr); + if (r) + return r; + + /* + * Remove the original mem node and create a new one at the request + * position. + */ + if (cpu_addr) + amdgpu_bo_kunmap(*bo_ptr); + + ttm_bo_mem_put(&(*bo_ptr)->tbo, &(*bo_ptr)->tbo.mem); + + for (i = 0; i < (*bo_ptr)->placement.num_placement; ++i) { + (*bo_ptr)->placements[i].fpfn = offset >> PAGE_SHIFT; + (*bo_ptr)->placements[i].lpfn = (offset + size) >> PAGE_SHIFT; + } + r = ttm_bo_mem_space(&(*bo_ptr)->tbo, &(*bo_ptr)->placement, + &(*bo_ptr)->tbo.mem, &ctx); + if (r) + goto error; + + if (cpu_addr) { + r = amdgpu_bo_kmap(*bo_ptr, cpu_addr); + if (r) + goto error; + } + + amdgpu_bo_unreserve(*bo_ptr); + return 0; + +error: + amdgpu_bo_unreserve(*bo_ptr); + amdgpu_bo_unref(bo_ptr); + return r; +} + +/** * amdgpu_bo_free_kernel - free BO for kernel use * * @bo: amdgpu BO to free @@ -411,15 +475,50 @@ fail: return false; } +bool amdgpu_bo_support_uswc(u64 bo_flags) +{ + +#ifdef CONFIG_X86_32 + /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit + * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 + */ + return false; +#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT) + /* Don't try to enable write-combining when it can't work, or things + * may be slow + * See https://bugs.freedesktop.org/show_bug.cgi?id=88758 + */ + +#ifndef CONFIG_COMPILE_TEST +#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \ + thanks to write-combining +#endif + + if (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) + DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " + "better performance thanks to write-combining\n"); + return false; +#else + /* For architectures that don't support WC memory, + * mask out the WC flag from the BO + */ + if (!drm_arch_can_wc_memory()) + return false; + + return true; +#endif +} + static int amdgpu_bo_do_create(struct amdgpu_device *adev, struct amdgpu_bo_param *bp, struct amdgpu_bo **bo_ptr) { struct ttm_operation_ctx ctx = { .interruptible = (bp->type != ttm_bo_type_kernel), - .no_wait_gpu = false, + .no_wait_gpu = bp->no_wait_gpu, .resv = bp->resv, - .flags = TTM_OPT_FLAG_ALLOW_RES_EVICT + .flags = bp->type != ttm_bo_type_kernel ? + TTM_OPT_FLAG_ALLOW_RES_EVICT : 0 }; struct amdgpu_bo *bo; unsigned long page_align, size = bp->size; @@ -452,7 +551,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, bo = kzalloc(sizeof(struct amdgpu_bo), GFP_KERNEL); if (bo == NULL) return -ENOMEM; - drm_gem_private_object_init(adev->ddev, &bo->gem_base, size); + drm_gem_private_object_init(adev->ddev, &bo->tbo.base, size); INIT_LIST_HEAD(&bo->shadow_list); bo->vm_bo = NULL; bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain : @@ -464,36 +563,15 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, bo->flags = bp->flags; -#ifdef CONFIG_X86_32 - /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit - * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 - */ - bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; -#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT) - /* Don't try to enable write-combining when it can't work, or things - * may be slow - * See https://bugs.freedesktop.org/show_bug.cgi?id=88758 - */ - -#ifndef CONFIG_COMPILE_TEST -#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \ - thanks to write-combining -#endif - - if (bo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) - DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " - "better performance thanks to write-combining\n"); - bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; -#else - /* For architectures that don't support WC memory, - * mask out the WC flag from the BO - */ - if (!drm_arch_can_wc_memory()) + if (!amdgpu_bo_support_uswc(bo->flags)) bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; -#endif bo->tbo.bdev = &adev->mman.bdev; - amdgpu_bo_placement_from_domain(bo, bp->domain); + if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA | + AMDGPU_GEM_DOMAIN_GDS)) + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); + else + amdgpu_bo_placement_from_domain(bo, bp->domain); if (bp->type == ttm_bo_type_kernel) bo->tbo.priority = 1; @@ -515,7 +593,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) { struct dma_fence *fence; - r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence); + r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence); if (unlikely(r)) goto fail_unreserve; @@ -538,7 +616,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, fail_unreserve: if (!bp->resv) - ww_mutex_unlock(&bo->tbo.resv->lock); + dma_resv_unlock(bo->tbo.base.resv); amdgpu_bo_unref(&bo); return r; } @@ -559,7 +637,7 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC | AMDGPU_GEM_CREATE_SHADOW; bp.type = ttm_bo_type_kernel; - bp.resv = bo->tbo.resv; + bp.resv = bo->tbo.base.resv; r = amdgpu_bo_do_create(adev, &bp, &bo->shadow); if (!r) { @@ -600,13 +678,13 @@ int amdgpu_bo_create(struct amdgpu_device *adev, if ((flags & AMDGPU_GEM_CREATE_SHADOW) && !(adev->flags & AMD_IS_APU)) { if (!bp->resv) - WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv, + WARN_ON(dma_resv_lock((*bo_ptr)->tbo.base.resv, NULL)); r = amdgpu_bo_create_shadow(adev, bp->size, *bo_ptr); if (!bp->resv) - reservation_object_unlock((*bo_ptr)->tbo.resv); + dma_resv_unlock((*bo_ptr)->tbo.base.resv); if (r) amdgpu_bo_unref(bo_ptr); @@ -703,7 +781,7 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) return 0; } - r = reservation_object_wait_timeout_rcu(bo->tbo.resv, false, false, + r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, false, false, MAX_SCHEDULE_TIMEOUT); if (r < 0) return r; @@ -973,6 +1051,7 @@ static const char *amdgpu_vram_names[] = { "HBM", "DDR3", "DDR4", + "GDDR6", }; /** @@ -1044,7 +1123,10 @@ void amdgpu_bo_fini(struct amdgpu_device *adev) int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo, struct vm_area_struct *vma) { - return ttm_fbdev_mmap(vma, &bo->tbo); + if (vma->vm_pgoff != 0) + return -EACCES; + + return ttm_bo_mmap_obj(vma, &bo->tbo); } /** @@ -1080,7 +1162,7 @@ int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags) */ void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags) { - lockdep_assert_held(&bo->tbo.resv->lock.base); + dma_resv_assert_held(bo->tbo.base.resv); if (tiling_flags) *tiling_flags = bo->tiling_flags; @@ -1205,6 +1287,42 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, } /** + * amdgpu_bo_move_notify - notification about a BO being released + * @bo: pointer to a buffer object + * + * Wipes VRAM buffers whose contents should not be leaked before the + * memory is released. + */ +void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) +{ + struct dma_fence *fence = NULL; + struct amdgpu_bo *abo; + int r; + + if (!amdgpu_bo_is_amdgpu_bo(bo)) + return; + + abo = ttm_to_amdgpu_bo(bo); + + if (abo->kfd_bo) + amdgpu_amdkfd_unreserve_memory_limit(abo); + + if (bo->mem.mem_type != TTM_PL_VRAM || !bo->mem.mm_node || + !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) + return; + + dma_resv_lock(bo->base.resv, NULL); + + r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence); + if (!WARN_ON(r)) { + amdgpu_bo_fence(abo, fence, false); + dma_fence_put(fence); + } + + dma_resv_unlock(bo->base.resv); +} + +/** * amdgpu_bo_fault_reserve_notify - notification about a memory fault * @bo: pointer to a buffer object * @@ -1276,12 +1394,12 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, bool shared) { - struct reservation_object *resv = bo->tbo.resv; + struct dma_resv *resv = bo->tbo.base.resv; if (shared) - reservation_object_add_shared_fence(resv, fence); + dma_resv_add_shared_fence(resv, fence); else - reservation_object_add_excl_fence(resv, fence); + dma_resv_add_excl_fence(resv, fence); } /** @@ -1301,7 +1419,7 @@ int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr) int r; amdgpu_sync_create(&sync); - amdgpu_sync_resv(adev, &sync, bo->tbo.resv, owner, false); + amdgpu_sync_resv(adev, &sync, bo->tbo.base.resv, owner, false); r = amdgpu_sync_wait(&sync, intr); amdgpu_sync_free(&sync); @@ -1321,7 +1439,7 @@ int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr) u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) { WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM); - WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) && + WARN_ON_ONCE(!dma_resv_is_locked(bo->tbo.base.resv) && !bo->pin_count && bo->tbo.type != ttm_bo_type_kernel); WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET); WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 220a6a7b1bc1..36dec51d1ef1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -30,6 +30,9 @@ #include <drm/amdgpu_drm.h> #include "amdgpu.h" +#ifdef CONFIG_MMU_NOTIFIER +#include <linux/mmu_notifier.h> +#endif #define AMDGPU_BO_INVALID_OFFSET LONG_MAX #define AMDGPU_BO_MAX_PLACEMENTS 3 @@ -41,7 +44,8 @@ struct amdgpu_bo_param { u32 preferred_domain; u64 flags; enum ttm_bo_type type; - struct reservation_object *resv; + bool no_wait_gpu; + struct dma_resv *resv; }; /* bo virtual addresses in a vm */ @@ -72,6 +76,8 @@ struct amdgpu_bo_va { /* If the mappings are cleared or filled */ bool cleared; + + bool is_xgmi; }; struct amdgpu_bo { @@ -92,17 +98,18 @@ struct amdgpu_bo { /* per VM structure for page tables and with virtual addresses */ struct amdgpu_vm_bo_base *vm_bo; /* Constant after initialization */ - struct drm_gem_object gem_base; struct amdgpu_bo *parent; struct amdgpu_bo *shadow; struct ttm_bo_kmap_obj dma_buf_vmap; struct amdgpu_mn *mn; - union { - struct list_head mn_list; - struct list_head shadow_list; - }; + +#ifdef CONFIG_MMU_NOTIFIER + struct mmu_interval_notifier notifier; +#endif + + struct list_head shadow_list; struct kgd_mem *kfd_bo; }; @@ -153,7 +160,7 @@ static inline int amdgpu_bo_reserve(struct amdgpu_bo *bo, bool no_intr) struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); int r; - r = ttm_bo_reserve(&bo->tbo, !no_intr, false, NULL); + r = __ttm_bo_reserve(&bo->tbo, !no_intr, false, NULL); if (unlikely(r != 0)) { if (r != -ERESTARTSYS) dev_err(adev->dev, "%p reserve failed\n", bo); @@ -190,7 +197,7 @@ static inline unsigned amdgpu_bo_gpu_page_alignment(struct amdgpu_bo *bo) */ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo) { - return drm_vma_node_offset_addr(&bo->tbo.vma_node); + return drm_vma_node_offset_addr(&bo->tbo.base.vma_node); } /** @@ -236,6 +243,9 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, unsigned long size, int align, u32 domain, struct amdgpu_bo **bo_ptr, u64 *gpu_addr, void **cpu_addr); +int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev, + uint64_t offset, uint64_t size, uint32_t domain, + struct amdgpu_bo **bo_ptr, void **cpu_addr); void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, void **cpu_addr); int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr); @@ -263,6 +273,7 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict, struct ttm_mem_reg *new_mem); +void amdgpu_bo_release_notify(struct ttm_buffer_object *bo); int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, bool shared); @@ -306,5 +317,7 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, struct seq_file *m); #endif +bool amdgpu_bo_support_uswc(u64 bo_flags); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c index 8e67c1210d7c..1f2305b7bd13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c @@ -20,7 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include <drm/drmP.h> + #include <drm/amdgpu_drm.h> #include "amdgpu.h" #include "atom.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index a7adb7b6bd98..f205f56e3358 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -22,14 +22,18 @@ * Authors: Rafał Miłecki <zajec5@gmail.com> * Alex Deucher <alexdeucher@gmail.com> */ -#include <drm/drmP.h> + +#include <drm/drm_debugfs.h> + #include "amdgpu.h" #include "amdgpu_drv.h" #include "amdgpu_pm.h" #include "amdgpu_dpm.h" #include "amdgpu_display.h" +#include "amdgpu_smu.h" #include "atom.h" #include <linux/power_supply.h> +#include <linux/pci.h> #include <linux/hwmon.h> #include <linux/hwmon-sysfs.h> #include <linux/nospec.h> @@ -63,9 +67,21 @@ static const struct cg_flag_name clocks[] = { {AMD_CG_SUPPORT_DRM_LS, "Digital Right Management Light Sleep"}, {AMD_CG_SUPPORT_ROM_MGCG, "Rom Medium Grain Clock Gating"}, {AMD_CG_SUPPORT_DF_MGCG, "Data Fabric Medium Grain Clock Gating"}, + + {AMD_CG_SUPPORT_ATHUB_MGCG, "Address Translation Hub Medium Grain Clock Gating"}, + {AMD_CG_SUPPORT_ATHUB_LS, "Address Translation Hub Light Sleep"}, {0, NULL}, }; +static const struct hwmon_temp_label { + enum PP_HWMON_TEMP channel; + const char *label; +} temp_label[] = { + {PP_TEMP_EDGE, "edge"}, + {PP_TEMP_JUNCTION, "junction"}, + {PP_TEMP_MEM, "mem"}, +}; + void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev) { if (adev->pm.dpm_enabled) { @@ -80,6 +96,27 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev) } } +int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors sensor, + void *data, uint32_t *size) +{ + int ret = 0; + + if (!data || !size) + return -EINVAL; + + if (is_support_sw_smu(adev)) + ret = smu_read_sensor(&adev->smu, sensor, data, size); + else { + if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->read_sensor) + ret = adev->powerplay.pp_funcs->read_sensor((adev)->powerplay.pp_handle, + sensor, data, size); + else + ret = -EINVAL; + } + + return ret; +} + /** * DOC: power_dpm_state * @@ -122,10 +159,16 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; enum amd_pm_state_type pm; - if (adev->powerplay.pp_funcs->get_current_power_state) + if (is_support_sw_smu(adev)) { + if (adev->smu.ppt_funcs->get_current_power_state) + pm = smu_get_current_power_state(&adev->smu); + else + pm = adev->pm.dpm.user_state; + } else if (adev->powerplay.pp_funcs->get_current_power_state) { pm = amdgpu_dpm_get_current_power_state(adev); - else + } else { pm = adev->pm.dpm.user_state; + } return snprintf(buf, PAGE_SIZE, "%s\n", (pm == POWER_STATE_TYPE_BATTERY) ? "battery" : @@ -152,7 +195,11 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, goto fail; } - if (adev->powerplay.pp_funcs->dispatch_tasks) { + if (is_support_sw_smu(adev)) { + mutex_lock(&adev->pm.mutex); + adev->pm.dpm.user_state = state; + mutex_unlock(&adev->pm.mutex); + } else if (adev->powerplay.pp_funcs->dispatch_tasks) { amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_ENABLE_USER_STATE, &state); } else { mutex_lock(&adev->pm.mutex); @@ -236,11 +283,16 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; enum amd_dpm_forced_level level = 0xff; - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + if (amdgpu_sriov_vf(adev)) + return 0; + + if ((adev->flags & AMD_IS_PX) && + (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return snprintf(buf, PAGE_SIZE, "off\n"); - if (adev->powerplay.pp_funcs->get_performance_level) + if (is_support_sw_smu(adev)) + level = smu_get_performance_level(&adev->smu); + else if (adev->powerplay.pp_funcs->get_performance_level) level = amdgpu_dpm_get_performance_level(adev); else level = adev->pm.dpm.forced_level; @@ -273,9 +325,6 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - if (adev->powerplay.pp_funcs->get_performance_level) - current_level = amdgpu_dpm_get_performance_level(adev); - if (strncmp("low", buf, strlen("low")) == 0) { level = AMD_DPM_FORCED_LEVEL_LOW; } else if (strncmp("high", buf, strlen("high")) == 0) { @@ -299,10 +348,42 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, goto fail; } + /* handle sriov case here */ + if (amdgpu_sriov_vf(adev)) { + if (amdgim_is_hwperf(adev) && + adev->virt.ops->force_dpm_level) { + mutex_lock(&adev->pm.mutex); + adev->virt.ops->force_dpm_level(adev, level); + mutex_unlock(&adev->pm.mutex); + return count; + } else { + return -EINVAL; + } + } + + if (is_support_sw_smu(adev)) + current_level = smu_get_performance_level(&adev->smu); + else if (adev->powerplay.pp_funcs->get_performance_level) + current_level = amdgpu_dpm_get_performance_level(adev); + if (current_level == level) return count; - if (adev->powerplay.pp_funcs->force_performance_level) { + /* profile_exit setting is valid only when current mode is in profile mode */ + if (!(current_level & (AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD | + AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK | + AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK | + AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)) && + (level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT)) { + pr_err("Currently not in any profile mode!\n"); + return -EINVAL; + } + + if (is_support_sw_smu(adev)) { + ret = smu_force_performance_level(&adev->smu, level); + if (ret) + count = -EINVAL; + } else if (adev->powerplay.pp_funcs->force_performance_level) { mutex_lock(&adev->pm.mutex); if (adev->pm.dpm.thermal_active) { count = -EINVAL; @@ -328,9 +409,13 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; struct pp_states_info data; - int i, buf_len; + int i, buf_len, ret; - if (adev->powerplay.pp_funcs->get_pp_num_states) + if (is_support_sw_smu(adev)) { + ret = smu_get_power_num_states(&adev->smu, &data); + if (ret) + return ret; + } else if (adev->powerplay.pp_funcs->get_pp_num_states) amdgpu_dpm_get_pp_num_states(adev, &data); buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums); @@ -351,23 +436,29 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; struct pp_states_info data; + struct smu_context *smu = &adev->smu; enum amd_pm_state_type pm = 0; - int i = 0; + int i = 0, ret = 0; - if (adev->powerplay.pp_funcs->get_current_power_state + if (is_support_sw_smu(adev)) { + pm = smu_get_current_power_state(smu); + ret = smu_get_power_num_states(smu, &data); + if (ret) + return ret; + } else if (adev->powerplay.pp_funcs->get_current_power_state && adev->powerplay.pp_funcs->get_pp_num_states) { pm = amdgpu_dpm_get_current_power_state(adev); amdgpu_dpm_get_pp_num_states(adev, &data); + } - for (i = 0; i < data.nums; i++) { - if (pm == data.states[i]) - break; - } - - if (i == data.nums) - i = -EINVAL; + for (i = 0; i < data.nums; i++) { + if (pm == data.states[i]) + break; } + if (i == data.nums) + i = -EINVAL; + return snprintf(buf, PAGE_SIZE, "%d\n", i); } @@ -397,6 +488,8 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, if (strlen(buf) == 1) adev->pp_force_state_enabled = false; + else if (is_support_sw_smu(adev)) + adev->pp_force_state_enabled = false; else if (adev->powerplay.pp_funcs->dispatch_tasks && adev->powerplay.pp_funcs->get_pp_num_states) { struct pp_states_info data; @@ -442,7 +535,12 @@ static ssize_t amdgpu_get_pp_table(struct device *dev, char *table = NULL; int size; - if (adev->powerplay.pp_funcs->get_pp_table) + if (is_support_sw_smu(adev)) { + size = smu_sys_get_pp_table(&adev->smu, (void **)&table); + if (size < 0) + return size; + } + else if (adev->powerplay.pp_funcs->get_pp_table) size = amdgpu_dpm_get_pp_table(adev, &table); else return 0; @@ -462,8 +560,13 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + int ret = 0; - if (adev->powerplay.pp_funcs->set_pp_table) + if (is_support_sw_smu(adev)) { + ret = smu_sys_set_pp_table(&adev->smu, (void *)buf, count); + if (ret) + return ret; + } else if (adev->powerplay.pp_funcs->set_pp_table) amdgpu_dpm_set_pp_table(adev, buf, count); return count; @@ -586,19 +689,29 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, tmp_str++; } - if (adev->powerplay.pp_funcs->odn_edit_dpm_table) - ret = amdgpu_dpm_odn_edit_dpm_table(adev, type, - parameter, parameter_size); - - if (ret) - return -EINVAL; + if (is_support_sw_smu(adev)) { + ret = smu_od_edit_dpm_table(&adev->smu, type, + parameter, parameter_size); - if (type == PP_OD_COMMIT_DPM_TABLE) { - if (adev->powerplay.pp_funcs->dispatch_tasks) { - amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL); - return count; - } else { + if (ret) return -EINVAL; + } else { + if (adev->powerplay.pp_funcs->odn_edit_dpm_table) { + ret = amdgpu_dpm_odn_edit_dpm_table(adev, type, + parameter, parameter_size); + if (ret) + return -EINVAL; + } + + if (type == PP_OD_COMMIT_DPM_TABLE) { + if (adev->powerplay.pp_funcs->dispatch_tasks) { + amdgpu_dpm_dispatch_task(adev, + AMD_PP_TASK_READJUST_POWER_STATE, + NULL); + return count; + } else { + return -EINVAL; + } } } @@ -613,7 +726,13 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; uint32_t size = 0; - if (adev->powerplay.pp_funcs->print_clock_levels) { + if (is_support_sw_smu(adev)) { + size = smu_print_clk_levels(&adev->smu, SMU_OD_SCLK, buf); + size += smu_print_clk_levels(&adev->smu, SMU_OD_MCLK, buf+size); + size += smu_print_clk_levels(&adev->smu, SMU_OD_VDDC_CURVE, buf+size); + size += smu_print_clk_levels(&adev->smu, SMU_OD_RANGE, buf+size); + return size; + } else if (adev->powerplay.pp_funcs->print_clock_levels) { size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf); size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size); size += amdgpu_dpm_print_clock_levels(adev, OD_VDDC_CURVE, buf+size); @@ -626,10 +745,10 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, } /** - * DOC: ppfeatures + * DOC: pp_features * * The amdgpu driver provides a sysfs API for adjusting what powerplay - * features to be enabled. The file ppfeatures is used for this. And + * features to be enabled. The file pp_features is used for this. And * this is only available for Vega10 and later dGPUs. * * Reading back the file will show you the followings: @@ -641,7 +760,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, * the corresponding bit from original ppfeature masks and input the * new ppfeature masks. */ -static ssize_t amdgpu_set_ppfeature_status(struct device *dev, +static ssize_t amdgpu_set_pp_feature_status(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -657,7 +776,11 @@ static ssize_t amdgpu_set_ppfeature_status(struct device *dev, pr_debug("featuremask = 0x%llx\n", featuremask); - if (adev->powerplay.pp_funcs->set_ppfeature_status) { + if (is_support_sw_smu(adev)) { + ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask); + if (ret) + return -EINVAL; + } else if (adev->powerplay.pp_funcs->set_ppfeature_status) { ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask); if (ret) return -EINVAL; @@ -666,22 +789,23 @@ static ssize_t amdgpu_set_ppfeature_status(struct device *dev, return count; } -static ssize_t amdgpu_get_ppfeature_status(struct device *dev, +static ssize_t amdgpu_get_pp_feature_status(struct device *dev, struct device_attribute *attr, char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - if (adev->powerplay.pp_funcs->get_ppfeature_status) + if (is_support_sw_smu(adev)) { + return smu_sys_get_pp_feature_mask(&adev->smu, buf); + } else if (adev->powerplay.pp_funcs->get_ppfeature_status) return amdgpu_dpm_get_ppfeature_status(adev, buf); return snprintf(buf, PAGE_SIZE, "\n"); } /** - * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_socclk pp_dpm_fclk pp_dpm_dcefclk - * pp_dpm_pcie + * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_socclk pp_dpm_fclk pp_dpm_dcefclk pp_dpm_pcie * * The amdgpu driver provides a sysfs API for adjusting what power levels * are enabled for a given power state. The files pp_dpm_sclk, pp_dpm_mclk, @@ -697,9 +821,15 @@ static ssize_t amdgpu_get_ppfeature_status(struct device *dev, * * To manually adjust these states, first select manual using * power_dpm_force_performance_level. - * Secondly,Enter a new value for each level by inputing a string that + * Secondly, enter a new value for each level by inputing a string that * contains " echo xx xx xx > pp_dpm_sclk/mclk/pcie" - * E.g., echo 4 5 6 to > pp_dpm_sclk will enable sclk levels 4, 5, and 6. + * E.g., + * + * .. code-block:: bash + * + * echo "4 5 6" > pp_dpm_sclk + * + * will enable sclk levels 4, 5, and 6. * * NOTE: change to the dcefclk max dpm level is not supported now */ @@ -711,7 +841,13 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - if (adev->powerplay.pp_funcs->print_clock_levels) + if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev) && + adev->virt.ops->get_pp_clk) + return adev->virt.ops->get_pp_clk(adev, PP_SCLK, buf); + + if (is_support_sw_smu(adev)) + return smu_print_clk_levels(&adev->smu, SMU_SCLK, buf); + else if (adev->powerplay.pp_funcs->print_clock_levels) return amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf); else return snprintf(buf, PAGE_SIZE, "\n"); @@ -763,11 +899,16 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, int ret; uint32_t mask = 0; + if (amdgpu_sriov_vf(adev)) + return 0; + ret = amdgpu_read_mask(buf, count, &mask); if (ret) return ret; - if (adev->powerplay.pp_funcs->force_clock_level) + if (is_support_sw_smu(adev)) + ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask, true); + else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); if (ret) @@ -783,7 +924,13 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - if (adev->powerplay.pp_funcs->print_clock_levels) + if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev) && + adev->virt.ops->get_pp_clk) + return adev->virt.ops->get_pp_clk(adev, PP_MCLK, buf); + + if (is_support_sw_smu(adev)) + return smu_print_clk_levels(&adev->smu, SMU_MCLK, buf); + else if (adev->powerplay.pp_funcs->print_clock_levels) return amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf); else return snprintf(buf, PAGE_SIZE, "\n"); @@ -799,11 +946,16 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, int ret; uint32_t mask = 0; + if (amdgpu_sriov_vf(adev)) + return 0; + ret = amdgpu_read_mask(buf, count, &mask); if (ret) return ret; - if (adev->powerplay.pp_funcs->force_clock_level) + if (is_support_sw_smu(adev)) + ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask, true); + else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); if (ret) @@ -819,7 +971,9 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - if (adev->powerplay.pp_funcs->print_clock_levels) + if (is_support_sw_smu(adev)) + return smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf); + else if (adev->powerplay.pp_funcs->print_clock_levels) return amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf); else return snprintf(buf, PAGE_SIZE, "\n"); @@ -839,7 +993,9 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, if (ret) return ret; - if (adev->powerplay.pp_funcs->force_clock_level) + if (is_support_sw_smu(adev)) + ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask, true); + else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SOCCLK, mask); if (ret) @@ -855,7 +1011,9 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - if (adev->powerplay.pp_funcs->print_clock_levels) + if (is_support_sw_smu(adev)) + return smu_print_clk_levels(&adev->smu, SMU_FCLK, buf); + else if (adev->powerplay.pp_funcs->print_clock_levels) return amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf); else return snprintf(buf, PAGE_SIZE, "\n"); @@ -875,7 +1033,9 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, if (ret) return ret; - if (adev->powerplay.pp_funcs->force_clock_level) + if (is_support_sw_smu(adev)) + ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask, true); + else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_FCLK, mask); if (ret) @@ -891,7 +1051,9 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - if (adev->powerplay.pp_funcs->print_clock_levels) + if (is_support_sw_smu(adev)) + return smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf); + else if (adev->powerplay.pp_funcs->print_clock_levels) return amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf); else return snprintf(buf, PAGE_SIZE, "\n"); @@ -911,7 +1073,9 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, if (ret) return ret; - if (adev->powerplay.pp_funcs->force_clock_level) + if (is_support_sw_smu(adev)) + ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask, true); + else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_DCEFCLK, mask); if (ret) @@ -927,7 +1091,9 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - if (adev->powerplay.pp_funcs->print_clock_levels) + if (is_support_sw_smu(adev)) + return smu_print_clk_levels(&adev->smu, SMU_PCIE, buf); + else if (adev->powerplay.pp_funcs->print_clock_levels) return amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf); else return snprintf(buf, PAGE_SIZE, "\n"); @@ -947,7 +1113,9 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, if (ret) return ret; - if (adev->powerplay.pp_funcs->force_clock_level) + if (is_support_sw_smu(adev)) + ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask, true); + else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); if (ret) @@ -964,7 +1132,9 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; uint32_t value = 0; - if (adev->powerplay.pp_funcs->get_sclk_od) + if (is_support_sw_smu(adev)) + value = smu_get_od_percentage(&(adev->smu), SMU_OD_SCLK); + else if (adev->powerplay.pp_funcs->get_sclk_od) value = amdgpu_dpm_get_sclk_od(adev); return snprintf(buf, PAGE_SIZE, "%d\n", value); @@ -986,14 +1156,19 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev, count = -EINVAL; goto fail; } - if (adev->powerplay.pp_funcs->set_sclk_od) - amdgpu_dpm_set_sclk_od(adev, (uint32_t)value); - if (adev->powerplay.pp_funcs->dispatch_tasks) { - amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL); + if (is_support_sw_smu(adev)) { + value = smu_set_od_percentage(&(adev->smu), SMU_OD_SCLK, (uint32_t)value); } else { - adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps; - amdgpu_pm_compute_clocks(adev); + if (adev->powerplay.pp_funcs->set_sclk_od) + amdgpu_dpm_set_sclk_od(adev, (uint32_t)value); + + if (adev->powerplay.pp_funcs->dispatch_tasks) { + amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL); + } else { + adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps; + amdgpu_pm_compute_clocks(adev); + } } fail: @@ -1008,7 +1183,9 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; uint32_t value = 0; - if (adev->powerplay.pp_funcs->get_mclk_od) + if (is_support_sw_smu(adev)) + value = smu_get_od_percentage(&(adev->smu), SMU_OD_MCLK); + else if (adev->powerplay.pp_funcs->get_mclk_od) value = amdgpu_dpm_get_mclk_od(adev); return snprintf(buf, PAGE_SIZE, "%d\n", value); @@ -1030,14 +1207,19 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, count = -EINVAL; goto fail; } - if (adev->powerplay.pp_funcs->set_mclk_od) - amdgpu_dpm_set_mclk_od(adev, (uint32_t)value); - if (adev->powerplay.pp_funcs->dispatch_tasks) { - amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL); + if (is_support_sw_smu(adev)) { + value = smu_set_od_percentage(&(adev->smu), SMU_OD_MCLK, (uint32_t)value); } else { - adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps; - amdgpu_pm_compute_clocks(adev); + if (adev->powerplay.pp_funcs->set_mclk_od) + amdgpu_dpm_set_mclk_od(adev, (uint32_t)value); + + if (adev->powerplay.pp_funcs->dispatch_tasks) { + amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL); + } else { + adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps; + amdgpu_pm_compute_clocks(adev); + } } fail: @@ -1071,7 +1253,9 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - if (adev->powerplay.pp_funcs->get_power_profile_mode) + if (is_support_sw_smu(adev)) + return smu_get_power_profile_mode(&adev->smu, buf); + else if (adev->powerplay.pp_funcs->get_power_profile_mode) return amdgpu_dpm_get_power_profile_mode(adev, buf); return snprintf(buf, PAGE_SIZE, "\n"); @@ -1121,9 +1305,10 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, } } parameter[parameter_size] = profile_mode; - if (adev->powerplay.pp_funcs->set_power_profile_mode) + if (is_support_sw_smu(adev)) + ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size, true); + else if (adev->powerplay.pp_funcs->set_power_profile_mode) ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size); - if (!ret) return count; fail: @@ -1146,14 +1331,36 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int r, value, size = sizeof(value); - /* sanity check PP is enabled */ - if (!(adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->read_sensor)) - return -EINVAL; - /* read the IP busy sensor */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size); + + if (r) + return r; + + return snprintf(buf, PAGE_SIZE, "%d\n", value); +} + +/** + * DOC: mem_busy_percent + * + * The amdgpu driver provides a sysfs API for reading how busy the VRAM + * is as a percentage. The file mem_busy_percent is used for this. + * The SMU firmware computes a percentage of load based on the + * aggregate activity level in the IP cores. + */ +static ssize_t amdgpu_get_memory_busy_percent(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + int r, value, size = sizeof(value); + + /* read the IP busy sensor */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, + (void *)&value, &size); + if (r) return r; @@ -1185,6 +1392,29 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev, count0, count1, pcie_get_mps(adev->pdev)); } +/** + * DOC: unique_id + * + * The amdgpu driver provides a sysfs API for providing a unique ID for the GPU + * The file unique_id is used for this. + * This will provide a Unique ID that will persist from machine to machine + * + * NOTE: This will only work for GFX9 and newer. This file will be absent + * on unsupported ASICs (GFX8 and older) + */ +static ssize_t amdgpu_get_unique_id(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + if (adev->unique_id) + return snprintf(buf, PAGE_SIZE, "%016llx\n", adev->unique_id); + + return 0; +} + static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state); static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR, amdgpu_get_dpm_forced_performance_level, @@ -1229,10 +1459,13 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR, amdgpu_set_pp_od_clk_voltage); static DEVICE_ATTR(gpu_busy_percent, S_IRUGO, amdgpu_get_busy_percent, NULL); +static DEVICE_ATTR(mem_busy_percent, S_IRUGO, + amdgpu_get_memory_busy_percent, NULL); static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL); -static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR, - amdgpu_get_ppfeature_status, - amdgpu_set_ppfeature_status); +static DEVICE_ATTR(pp_features, S_IRUGO | S_IWUSR, + amdgpu_get_pp_feature_status, + amdgpu_set_pp_feature_status); +static DEVICE_ATTR(unique_id, S_IRUGO, amdgpu_get_unique_id, NULL); static ssize_t amdgpu_hwmon_show_temp(struct device *dev, struct device_attribute *attr, @@ -1240,23 +1473,40 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); struct drm_device *ddev = adev->ddev; - int r, temp, size = sizeof(temp); + int channel = to_sensor_dev_attr(attr)->index; + int r, temp = 0, size = sizeof(temp); /* Can't get temperature when the card is off */ if ((adev->flags & AMD_IS_PX) && (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - /* sanity check PP is enabled */ - if (!(adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->read_sensor)) + if (channel >= PP_TEMP_MAX) return -EINVAL; - /* get the temperature */ - r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, - (void *)&temp, &size); - if (r) - return r; + switch (channel) { + case PP_TEMP_JUNCTION: + /* get current junction temperature */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP, + (void *)&temp, &size); + if (r) + return r; + break; + case PP_TEMP_EDGE: + /* get current edge temperature */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP, + (void *)&temp, &size); + if (r) + return r; + break; + case PP_TEMP_MEM: + /* get current memory temperature */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP, + (void *)&temp, &size); + if (r) + return r; + break; + } return snprintf(buf, PAGE_SIZE, "%d\n", temp); } @@ -1277,17 +1527,90 @@ static ssize_t amdgpu_hwmon_show_temp_thresh(struct device *dev, return snprintf(buf, PAGE_SIZE, "%d\n", temp); } +static ssize_t amdgpu_hwmon_show_hotspot_temp_thresh(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + int hyst = to_sensor_dev_attr(attr)->index; + int temp; + + if (hyst) + temp = adev->pm.dpm.thermal.min_hotspot_temp; + else + temp = adev->pm.dpm.thermal.max_hotspot_crit_temp; + + return snprintf(buf, PAGE_SIZE, "%d\n", temp); +} + +static ssize_t amdgpu_hwmon_show_mem_temp_thresh(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + int hyst = to_sensor_dev_attr(attr)->index; + int temp; + + if (hyst) + temp = adev->pm.dpm.thermal.min_mem_temp; + else + temp = adev->pm.dpm.thermal.max_mem_crit_temp; + + return snprintf(buf, PAGE_SIZE, "%d\n", temp); +} + +static ssize_t amdgpu_hwmon_show_temp_label(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int channel = to_sensor_dev_attr(attr)->index; + + if (channel >= PP_TEMP_MAX) + return -EINVAL; + + return snprintf(buf, PAGE_SIZE, "%s\n", temp_label[channel].label); +} + +static ssize_t amdgpu_hwmon_show_temp_emergency(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + int channel = to_sensor_dev_attr(attr)->index; + int temp = 0; + + if (channel >= PP_TEMP_MAX) + return -EINVAL; + + switch (channel) { + case PP_TEMP_JUNCTION: + temp = adev->pm.dpm.thermal.max_hotspot_emergency_temp; + break; + case PP_TEMP_EDGE: + temp = adev->pm.dpm.thermal.max_edge_emergency_temp; + break; + case PP_TEMP_MEM: + temp = adev->pm.dpm.thermal.max_mem_emergency_temp; + break; + } + + return snprintf(buf, PAGE_SIZE, "%d\n", temp); +} + static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, struct device_attribute *attr, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); u32 pwm_mode = 0; + if (is_support_sw_smu(adev)) { + pwm_mode = smu_get_fan_control_mode(&adev->smu); + } else { + if (!adev->powerplay.pp_funcs->get_fan_control_mode) + return -EINVAL; - if (!adev->powerplay.pp_funcs->get_fan_control_mode) - return -EINVAL; - - pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); + pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); + } return sprintf(buf, "%i\n", pwm_mode); } @@ -1306,14 +1629,18 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - if (!adev->powerplay.pp_funcs->set_fan_control_mode) - return -EINVAL; - err = kstrtoint(buf, 10, &value); if (err) return err; - amdgpu_dpm_set_fan_control_mode(adev, value); + if (is_support_sw_smu(adev)) { + smu_set_fan_control_mode(&adev->smu, value); + } else { + if (!adev->powerplay.pp_funcs->set_fan_control_mode) + return -EINVAL; + + amdgpu_dpm_set_fan_control_mode(adev, value); + } return count; } @@ -1345,8 +1672,10 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev, if ((adev->flags & AMD_IS_PX) && (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - - pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); + if (is_support_sw_smu(adev)) + pwm_mode = smu_get_fan_control_mode(&adev->smu); + else + pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); if (pwm_mode != AMD_FAN_CTRL_MANUAL) { pr_info("manual fan speed control should be enabled first\n"); return -EINVAL; @@ -1358,7 +1687,11 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev, value = (value * 100) / 255; - if (adev->powerplay.pp_funcs->set_fan_speed_percent) { + if (is_support_sw_smu(adev)) { + err = smu_set_fan_speed_percent(&adev->smu, value); + if (err) + return err; + } else if (adev->powerplay.pp_funcs->set_fan_speed_percent) { err = amdgpu_dpm_set_fan_speed_percent(adev, value); if (err) return err; @@ -1380,7 +1713,11 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev, (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - if (adev->powerplay.pp_funcs->get_fan_speed_percent) { + if (is_support_sw_smu(adev)) { + err = smu_get_fan_speed_percent(&adev->smu, &speed); + if (err) + return err; + } else if (adev->powerplay.pp_funcs->get_fan_speed_percent) { err = amdgpu_dpm_get_fan_speed_percent(adev, &speed); if (err) return err; @@ -1404,7 +1741,11 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev, (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { + if (is_support_sw_smu(adev)) { + err = smu_get_fan_speed_rpm(&adev->smu, &speed); + if (err) + return err; + } else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed); if (err) return err; @@ -1422,9 +1763,6 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev, u32 size = sizeof(min_rpm); int r; - if (!adev->powerplay.pp_funcs->read_sensor) - return -EINVAL; - r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM, (void *)&min_rpm, &size); if (r) @@ -1442,9 +1780,6 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev, u32 size = sizeof(max_rpm); int r; - if (!adev->powerplay.pp_funcs->read_sensor) - return -EINVAL; - r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM, (void *)&max_rpm, &size); if (r) @@ -1466,7 +1801,11 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev, (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { + if (is_support_sw_smu(adev)) { + err = smu_get_fan_speed_rpm(&adev->smu, &rpm); + if (err) + return err; + } else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { err = amdgpu_dpm_get_fan_speed_rpm(adev, &rpm); if (err) return err; @@ -1484,7 +1823,11 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev, u32 value; u32 pwm_mode; - pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); + if (is_support_sw_smu(adev)) + pwm_mode = smu_get_fan_control_mode(&adev->smu); + else + pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); + if (pwm_mode != AMD_FAN_CTRL_MANUAL) return -ENODATA; @@ -1497,7 +1840,11 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev, if (err) return err; - if (adev->powerplay.pp_funcs->set_fan_speed_rpm) { + if (is_support_sw_smu(adev)) { + err = smu_set_fan_speed_rpm(&adev->smu, value); + if (err) + return err; + } else if (adev->powerplay.pp_funcs->set_fan_speed_rpm) { err = amdgpu_dpm_set_fan_speed_rpm(adev, value); if (err) return err; @@ -1513,11 +1860,14 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev, struct amdgpu_device *adev = dev_get_drvdata(dev); u32 pwm_mode = 0; - if (!adev->powerplay.pp_funcs->get_fan_control_mode) - return -EINVAL; - - pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); + if (is_support_sw_smu(adev)) { + pwm_mode = smu_get_fan_control_mode(&adev->smu); + } else { + if (!adev->powerplay.pp_funcs->get_fan_control_mode) + return -EINVAL; + pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); + } return sprintf(buf, "%i\n", pwm_mode == AMD_FAN_CTRL_AUTO ? 0 : 1); } @@ -1536,8 +1886,6 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev, (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - if (!adev->powerplay.pp_funcs->set_fan_control_mode) - return -EINVAL; err = kstrtoint(buf, 10, &value); if (err) @@ -1550,7 +1898,13 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev, else return -EINVAL; - amdgpu_dpm_set_fan_control_mode(adev, pwm_mode); + if (is_support_sw_smu(adev)) { + smu_set_fan_control_mode(&adev->smu, pwm_mode); + } else { + if (!adev->powerplay.pp_funcs->set_fan_control_mode) + return -EINVAL; + amdgpu_dpm_set_fan_control_mode(adev, pwm_mode); + } return count; } @@ -1569,11 +1923,6 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev, (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - /* sanity check PP is enabled */ - if (!(adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->read_sensor)) - return -EINVAL; - /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, (void *)&vddgfx, &size); @@ -1608,11 +1957,6 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev, (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - /* sanity check PP is enabled */ - if (!(adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->read_sensor)) - return -EINVAL; - /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&vddnb, &size); @@ -1644,11 +1988,6 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev, (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - /* sanity check PP is enabled */ - if (!(adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->read_sensor)) - return -EINVAL; - /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size); @@ -1675,7 +2014,10 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, struct amdgpu_device *adev = dev_get_drvdata(dev); uint32_t limit = 0; - if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { + if (is_support_sw_smu(adev)) { + smu_get_power_limit(&adev->smu, &limit, true, true); + return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, true); return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else { @@ -1690,7 +2032,10 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, struct amdgpu_device *adev = dev_get_drvdata(dev); uint32_t limit = 0; - if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { + if (is_support_sw_smu(adev)) { + smu_get_power_limit(&adev->smu, &limit, false, true); + return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, false); return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else { @@ -1713,14 +2058,18 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, return err; value = value / 1000000; /* convert to Watt */ - if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) { + + if (is_support_sw_smu(adev)) { + err = smu_set_power_limit(&adev->smu, value); + } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) { err = adev->powerplay.pp_funcs->set_power_limit(adev->powerplay.pp_handle, value); - if (err) - return err; } else { - return -EINVAL; + err = -EINVAL; } + if (err) + return err; + return count; } @@ -1738,11 +2087,6 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev, (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - /* sanity check PP is enabled */ - if (!(adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->read_sensor)) - return -EINVAL; - /* get the sclk */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, (void *)&sclk, &size); @@ -1773,11 +2117,6 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev, (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - /* sanity check PP is enabled */ - if (!(adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->read_sensor)) - return -EINVAL; - /* get the sclk */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK, (void *)&mclk, &size); @@ -1815,11 +2154,20 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, * * hwmon interfaces for GPU temperature: * - * - temp1_input: the on die GPU temperature in millidegrees Celsius + * - temp[1-3]_input: the on die GPU temperature in millidegrees Celsius + * - temp2_input and temp3_input are supported on SOC15 dGPUs only * - * - temp1_crit: temperature critical max value in millidegrees Celsius + * - temp[1-3]_label: temperature channel label + * - temp2_label and temp3_label are supported on SOC15 dGPUs only * - * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius + * - temp[1-3]_crit: temperature critical max value in millidegrees Celsius + * - temp2_crit and temp3_crit are supported on SOC15 dGPUs only + * + * - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius + * - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only + * + * - temp[1-3]_emergency: temperature emergency max value(asic shutdown) in millidegrees Celsius + * - these are supported on SOC15 dGPUs only * * hwmon interfaces for GPU voltage: * @@ -1853,9 +2201,9 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, * * - fan1_input: fan speed in RPM * - * - fan[1-*]_target: Desired fan speed Unit: revolution/min (RPM) + * - fan[1-\*]_target: Desired fan speed Unit: revolution/min (RPM) * - * - fan[1-*]_enable: Enable or disable the sensors.1: Enable 0: Disable + * - fan[1-\*]_enable: Enable or disable the sensors.1: Enable 0: Disable * * hwmon interfaces for GPU clocks: * @@ -1867,9 +2215,21 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, * */ -static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0); +static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_EDGE); static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1); +static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE); +static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_JUNCTION); +static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0); +static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1); +static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION); +static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_MEM); +static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0); +static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1); +static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM); +static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_EDGE); +static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_JUNCTION); +static SENSOR_DEVICE_ATTR(temp3_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_MEM); static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0); static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0); static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0); @@ -1896,6 +2256,18 @@ static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_temp1_input.dev_attr.attr, &sensor_dev_attr_temp1_crit.dev_attr.attr, &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr, + &sensor_dev_attr_temp2_input.dev_attr.attr, + &sensor_dev_attr_temp2_crit.dev_attr.attr, + &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr, + &sensor_dev_attr_temp3_input.dev_attr.attr, + &sensor_dev_attr_temp3_crit.dev_attr.attr, + &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr, + &sensor_dev_attr_temp1_emergency.dev_attr.attr, + &sensor_dev_attr_temp2_emergency.dev_attr.attr, + &sensor_dev_attr_temp3_emergency.dev_attr.attr, + &sensor_dev_attr_temp1_label.dev_attr.attr, + &sensor_dev_attr_temp2_label.dev_attr.attr, + &sensor_dev_attr_temp3_label.dev_attr.attr, &sensor_dev_attr_pwm1.dev_attr.attr, &sensor_dev_attr_pwm1_enable.dev_attr.attr, &sensor_dev_attr_pwm1_min.dev_attr.attr, @@ -1967,39 +2339,51 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_fan1_enable.dev_attr.attr)) return 0; - /* mask fan attributes if we have no bindings for this asic to expose */ - if ((!adev->powerplay.pp_funcs->get_fan_speed_percent && - attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't query fan */ - (!adev->powerplay.pp_funcs->get_fan_control_mode && - attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't query state */ - effective_mode &= ~S_IRUGO; + if (!is_support_sw_smu(adev)) { + /* mask fan attributes if we have no bindings for this asic to expose */ + if ((!adev->powerplay.pp_funcs->get_fan_speed_percent && + attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't query fan */ + (!adev->powerplay.pp_funcs->get_fan_control_mode && + attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't query state */ + effective_mode &= ~S_IRUGO; - if ((!adev->powerplay.pp_funcs->set_fan_speed_percent && - attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't manage fan */ - (!adev->powerplay.pp_funcs->set_fan_control_mode && - attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't manage state */ - effective_mode &= ~S_IWUSR; + if ((!adev->powerplay.pp_funcs->set_fan_speed_percent && + attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't manage fan */ + (!adev->powerplay.pp_funcs->set_fan_control_mode && + attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't manage state */ + effective_mode &= ~S_IWUSR; + } - if ((adev->flags & AMD_IS_APU) && + if (((adev->flags & AMD_IS_APU) || + adev->family == AMDGPU_FAMILY_SI || /* not implemented yet */ + adev->family == AMDGPU_FAMILY_KV) && /* not implemented yet */ (attr == &sensor_dev_attr_power1_average.dev_attr.attr || attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr|| attr == &sensor_dev_attr_power1_cap.dev_attr.attr)) return 0; - /* hide max/min values if we can't both query and manage the fan */ - if ((!adev->powerplay.pp_funcs->set_fan_speed_percent && - !adev->powerplay.pp_funcs->get_fan_speed_percent) && - (!adev->powerplay.pp_funcs->set_fan_speed_rpm && - !adev->powerplay.pp_funcs->get_fan_speed_rpm) && - (attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || - attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) - return 0; - - if ((!adev->powerplay.pp_funcs->set_fan_speed_rpm && - !adev->powerplay.pp_funcs->get_fan_speed_rpm) && - (attr == &sensor_dev_attr_fan1_max.dev_attr.attr || - attr == &sensor_dev_attr_fan1_min.dev_attr.attr)) + if (!is_support_sw_smu(adev)) { + /* hide max/min values if we can't both query and manage the fan */ + if ((!adev->powerplay.pp_funcs->set_fan_speed_percent && + !adev->powerplay.pp_funcs->get_fan_speed_percent) && + (!adev->powerplay.pp_funcs->set_fan_speed_rpm && + !adev->powerplay.pp_funcs->get_fan_speed_rpm) && + (attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) + return 0; + + if ((!adev->powerplay.pp_funcs->set_fan_speed_rpm && + !adev->powerplay.pp_funcs->get_fan_speed_rpm) && + (attr == &sensor_dev_attr_fan1_max.dev_attr.attr || + attr == &sensor_dev_attr_fan1_min.dev_attr.attr)) + return 0; + } + + if ((adev->family == AMDGPU_FAMILY_SI || /* not implemented yet */ + adev->family == AMDGPU_FAMILY_KV) && /* not implemented yet */ + (attr == &sensor_dev_attr_in0_input.dev_attr.attr || + attr == &sensor_dev_attr_in0_label.dev_attr.attr)) return 0; /* only APUs have vddnb */ @@ -2014,6 +2398,22 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_freq2_label.dev_attr.attr)) return 0; + /* only SOC15 dGPUs support hotspot and mem temperatures */ + if (((adev->flags & AMD_IS_APU) || + adev->asic_type < CHIP_VEGA10) && + (attr == &sensor_dev_attr_temp2_crit.dev_attr.attr || + attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr || + attr == &sensor_dev_attr_temp3_crit.dev_attr.attr || + attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr || + attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr || + attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr || + attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr || + attr == &sensor_dev_attr_temp2_input.dev_attr.attr || + attr == &sensor_dev_attr_temp3_input.dev_attr.attr || + attr == &sensor_dev_attr_temp2_label.dev_attr.attr || + attr == &sensor_dev_attr_temp3_label.dev_attr.attr)) + return 0; + return effective_mode; } @@ -2039,9 +2439,7 @@ void amdgpu_dpm_thermal_work_handler(struct work_struct *work) if (!adev->pm.dpm_enabled) return; - if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->read_sensor && - !amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, (void *)&temp, &size)) { if (temp < adev->pm.dpm.thermal.min_temp) /* switch back the user state */ @@ -2267,7 +2665,13 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev) void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) { - if (adev->powerplay.pp_funcs->set_powergating_by_smu) { + int ret = 0; + if (is_support_sw_smu(adev)) { + ret = smu_dpm_set_power_gate(&adev->smu, AMD_IP_BLOCK_TYPE_UVD, enable); + if (ret) + DRM_ERROR("[SW SMU]: dpm enable uvd failed, state = %s, ret = %d. \n", + enable ? "true" : "false", ret); + } else if (adev->powerplay.pp_funcs->set_powergating_by_smu) { /* enable/disable UVD */ mutex_lock(&adev->pm.mutex); amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable); @@ -2288,7 +2692,13 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) { - if (adev->powerplay.pp_funcs->set_powergating_by_smu) { + int ret = 0; + if (is_support_sw_smu(adev)) { + ret = smu_dpm_set_power_gate(&adev->smu, AMD_IP_BLOCK_TYPE_VCE, enable); + if (ret) + DRM_ERROR("[SW SMU]: dpm enable vce failed, state = %s, ret = %d. \n", + enable ? "true" : "false", ret); + } else if (adev->powerplay.pp_funcs->set_powergating_by_smu) { /* enable/disable VCE */ mutex_lock(&adev->pm.mutex); amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable); @@ -2308,6 +2718,59 @@ void amdgpu_pm_print_power_states(struct amdgpu_device *adev) } +int amdgpu_pm_virt_sysfs_init(struct amdgpu_device *adev) +{ + int ret = 0; + + if (!(amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))) + return ret; + + ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk); + if (ret) { + DRM_ERROR("failed to create device file pp_dpm_sclk\n"); + return ret; + } + + ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk); + if (ret) { + DRM_ERROR("failed to create device file pp_dpm_mclk\n"); + return ret; + } + + ret = device_create_file(adev->dev, &dev_attr_power_dpm_force_performance_level); + if (ret) { + DRM_ERROR("failed to create device file for dpm state\n"); + return ret; + } + + return ret; +} + +void amdgpu_pm_virt_sysfs_fini(struct amdgpu_device *adev) +{ + if (!(amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))) + return; + + device_remove_file(adev->dev, &dev_attr_power_dpm_force_performance_level); + device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk); + device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk); +} + +int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version) +{ + int r; + + if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->load_firmware) { + r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle); + if (r) { + pr_err("smu firmware loading failed\n"); + return r; + } + *smu_version = adev->pm.fw_version; + } + return 0; +} + int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) { struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; @@ -2367,6 +2830,19 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) DRM_ERROR("failed to create device file pp_dpm_sclk\n"); return ret; } + + /* Arcturus does not support standalone mclk/socclk/fclk level setting */ + if (adev->asic_type == CHIP_ARCTURUS) { + dev_attr_pp_dpm_mclk.attr.mode &= ~S_IWUGO; + dev_attr_pp_dpm_mclk.store = NULL; + + dev_attr_pp_dpm_socclk.attr.mode &= ~S_IWUGO; + dev_attr_pp_dpm_socclk.store = NULL; + + dev_attr_pp_dpm_fclk.attr.mode &= ~S_IWUGO; + dev_attr_pp_dpm_fclk.store = NULL; + } + ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk); if (ret) { DRM_ERROR("failed to create device file pp_dpm_mclk\n"); @@ -2378,10 +2854,12 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) DRM_ERROR("failed to create device file pp_dpm_socclk\n"); return ret; } - ret = device_create_file(adev->dev, &dev_attr_pp_dpm_dcefclk); - if (ret) { - DRM_ERROR("failed to create device file pp_dpm_dcefclk\n"); - return ret; + if (adev->asic_type != CHIP_ARCTURUS) { + ret = device_create_file(adev->dev, &dev_attr_pp_dpm_dcefclk); + if (ret) { + DRM_ERROR("failed to create device file pp_dpm_dcefclk\n"); + return ret; + } } } if (adev->asic_type >= CHIP_VEGA20) { @@ -2391,10 +2869,12 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) return ret; } } - ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie); - if (ret) { - DRM_ERROR("failed to create device file pp_dpm_pcie\n"); - return ret; + if (adev->asic_type != CHIP_ARCTURUS) { + ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie); + if (ret) { + DRM_ERROR("failed to create device file pp_dpm_pcie\n"); + return ret; + } } ret = device_create_file(adev->dev, &dev_attr_pp_sclk_od); if (ret) { @@ -2413,7 +2893,8 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) "pp_power_profile_mode\n"); return ret; } - if (hwmgr->od_enabled) { + if ((is_support_sw_smu(adev) && adev->smu.od_enabled) || + (!is_support_sw_smu(adev) && hwmgr->od_enabled)) { ret = device_create_file(adev->dev, &dev_attr_pp_od_clk_voltage); if (ret) { @@ -2429,6 +2910,17 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) "gpu_busy_level\n"); return ret; } + /* APU does not have its own dedicated memory */ + if (!(adev->flags & AMD_IS_APU) && + (adev->asic_type != CHIP_VEGA10)) { + ret = device_create_file(adev->dev, + &dev_attr_mem_busy_percent); + if (ret) { + DRM_ERROR("failed to create device file " + "mem_busy_percent\n"); + return ret; + } + } /* PCIe Perf counters won't work on APU nodes */ if (!(adev->flags & AMD_IS_APU)) { ret = device_create_file(adev->dev, &dev_attr_pcie_bw); @@ -2437,6 +2929,12 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) return ret; } } + if (adev->unique_id) + ret = device_create_file(adev->dev, &dev_attr_unique_id); + if (ret) { + DRM_ERROR("failed to create device file unique_id\n"); + return ret; + } ret = amdgpu_debugfs_pm_init(adev); if (ret) { DRM_ERROR("Failed to register debugfs file for dpm!\n"); @@ -2446,10 +2944,10 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) if ((adev->asic_type >= CHIP_VEGA10) && !(adev->flags & AMD_IS_APU)) { ret = device_create_file(adev->dev, - &dev_attr_ppfeatures); + &dev_attr_pp_features); if (ret) { DRM_ERROR("failed to create device file " - "ppfeatures\n"); + "pp_features\n"); return ret; } } @@ -2480,24 +2978,32 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk); if (adev->asic_type >= CHIP_VEGA10) { device_remove_file(adev->dev, &dev_attr_pp_dpm_socclk); - device_remove_file(adev->dev, &dev_attr_pp_dpm_dcefclk); + if (adev->asic_type != CHIP_ARCTURUS) + device_remove_file(adev->dev, &dev_attr_pp_dpm_dcefclk); } - device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie); + if (adev->asic_type != CHIP_ARCTURUS) + device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie); if (adev->asic_type >= CHIP_VEGA20) device_remove_file(adev->dev, &dev_attr_pp_dpm_fclk); device_remove_file(adev->dev, &dev_attr_pp_sclk_od); device_remove_file(adev->dev, &dev_attr_pp_mclk_od); device_remove_file(adev->dev, &dev_attr_pp_power_profile_mode); - if (hwmgr->od_enabled) + if ((is_support_sw_smu(adev) && adev->smu.od_enabled) || + (!is_support_sw_smu(adev) && hwmgr->od_enabled)) device_remove_file(adev->dev, &dev_attr_pp_od_clk_voltage); device_remove_file(adev->dev, &dev_attr_gpu_busy_percent); + if (!(adev->flags & AMD_IS_APU) && + (adev->asic_type != CHIP_VEGA10)) + device_remove_file(adev->dev, &dev_attr_mem_busy_percent); if (!(adev->flags & AMD_IS_APU)) device_remove_file(adev->dev, &dev_attr_pcie_bw); + if (adev->unique_id) + device_remove_file(adev->dev, &dev_attr_unique_id); if ((adev->asic_type >= CHIP_VEGA10) && !(adev->flags & AMD_IS_APU)) - device_remove_file(adev->dev, &dev_attr_ppfeatures); + device_remove_file(adev->dev, &dev_attr_pp_features); } void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) @@ -2516,28 +3022,36 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) amdgpu_fence_wait_empty(ring); } - if (adev->powerplay.pp_funcs->dispatch_tasks) { - if (!amdgpu_device_has_dc_support(adev)) { + if (is_support_sw_smu(adev)) { + struct smu_dpm_context *smu_dpm = &adev->smu.smu_dpm; + smu_handle_task(&adev->smu, + smu_dpm->dpm_level, + AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, + true); + } else { + if (adev->powerplay.pp_funcs->dispatch_tasks) { + if (!amdgpu_device_has_dc_support(adev)) { + mutex_lock(&adev->pm.mutex); + amdgpu_dpm_get_active_displays(adev); + adev->pm.pm_display_cfg.num_display = adev->pm.dpm.new_active_crtc_count; + adev->pm.pm_display_cfg.vrefresh = amdgpu_dpm_get_vrefresh(adev); + adev->pm.pm_display_cfg.min_vblank_time = amdgpu_dpm_get_vblank_time(adev); + /* we have issues with mclk switching with refresh rates over 120 hz on the non-DC code. */ + if (adev->pm.pm_display_cfg.vrefresh > 120) + adev->pm.pm_display_cfg.min_vblank_time = 0; + if (adev->powerplay.pp_funcs->display_configuration_change) + adev->powerplay.pp_funcs->display_configuration_change( + adev->powerplay.pp_handle, + &adev->pm.pm_display_cfg); + mutex_unlock(&adev->pm.mutex); + } + amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL); + } else { mutex_lock(&adev->pm.mutex); amdgpu_dpm_get_active_displays(adev); - adev->pm.pm_display_cfg.num_display = adev->pm.dpm.new_active_crtc_count; - adev->pm.pm_display_cfg.vrefresh = amdgpu_dpm_get_vrefresh(adev); - adev->pm.pm_display_cfg.min_vblank_time = amdgpu_dpm_get_vblank_time(adev); - /* we have issues with mclk switching with refresh rates over 120 hz on the non-DC code. */ - if (adev->pm.pm_display_cfg.vrefresh > 120) - adev->pm.pm_display_cfg.min_vblank_time = 0; - if (adev->powerplay.pp_funcs->display_configuration_change) - adev->powerplay.pp_funcs->display_configuration_change( - adev->powerplay.pp_handle, - &adev->pm.pm_display_cfg); + amdgpu_dpm_change_power_state_locked(adev); mutex_unlock(&adev->pm.mutex); } - amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL); - } else { - mutex_lock(&adev->pm.mutex); - amdgpu_dpm_get_active_displays(adev); - amdgpu_dpm_change_power_state_locked(adev); - mutex_unlock(&adev->pm.mutex); } } @@ -2553,11 +3067,6 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a uint32_t query = 0; int size; - /* sanity check PP is enabled */ - if (!(adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->read_sensor)) - return -EINVAL; - /* GPU Clocks */ size = sizeof(value); seq_printf(m, "GFX Clocks and Power:\n"); @@ -2586,34 +3095,54 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a /* GPU Load */ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size)) seq_printf(m, "GPU Load: %u %%\n", value); + /* MEM Load */ + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, (void *)&value, &size)) + seq_printf(m, "MEM Load: %u %%\n", value); + seq_printf(m, "\n"); /* SMC feature mask */ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK, (void *)&value64, &size)) seq_printf(m, "SMC Feature Mask: 0x%016llx\n", value64); - /* UVD clocks */ - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_POWER, (void *)&value, &size)) { - if (!value) { - seq_printf(m, "UVD: Disabled\n"); - } else { - seq_printf(m, "UVD: Enabled\n"); - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, (void *)&value, &size)) - seq_printf(m, "\t%u MHz (DCLK)\n", value/100); - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, (void *)&value, &size)) - seq_printf(m, "\t%u MHz (VCLK)\n", value/100); + if (adev->asic_type > CHIP_VEGA20) { + /* VCN clocks */ + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCN_POWER_STATE, (void *)&value, &size)) { + if (!value) { + seq_printf(m, "VCN: Disabled\n"); + } else { + seq_printf(m, "VCN: Enabled\n"); + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, (void *)&value, &size)) + seq_printf(m, "\t%u MHz (DCLK)\n", value/100); + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, (void *)&value, &size)) + seq_printf(m, "\t%u MHz (VCLK)\n", value/100); + } } - } - seq_printf(m, "\n"); - - /* VCE clocks */ - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_POWER, (void *)&value, &size)) { - if (!value) { - seq_printf(m, "VCE: Disabled\n"); - } else { - seq_printf(m, "VCE: Enabled\n"); - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_ECCLK, (void *)&value, &size)) - seq_printf(m, "\t%u MHz (ECCLK)\n", value/100); + seq_printf(m, "\n"); + } else { + /* UVD clocks */ + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_POWER, (void *)&value, &size)) { + if (!value) { + seq_printf(m, "UVD: Disabled\n"); + } else { + seq_printf(m, "UVD: Enabled\n"); + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, (void *)&value, &size)) + seq_printf(m, "\t%u MHz (DCLK)\n", value/100); + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, (void *)&value, &size)) + seq_printf(m, "\t%u MHz (VCLK)\n", value/100); + } + } + seq_printf(m, "\n"); + + /* VCE clocks */ + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_POWER, (void *)&value, &size)) { + if (!value) { + seq_printf(m, "VCE: Disabled\n"); + } else { + seq_printf(m, "VCE: Enabled\n"); + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_ECCLK, (void *)&value, &size)) + seq_printf(m, "\t%u MHz (ECCLK)\n", value/100); + } } } @@ -2649,7 +3178,7 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) if ((adev->flags & AMD_IS_PX) && (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) { seq_printf(m, "PX asic powered off\n"); - } else if (adev->powerplay.pp_funcs->debugfs_print_current_performance_level) { + } else if (!is_support_sw_smu(adev) && adev->powerplay.pp_funcs->debugfs_print_current_performance_level) { mutex_lock(&adev->pm.mutex); if (adev->powerplay.pp_funcs->debugfs_print_current_performance_level) adev->powerplay.pp_funcs->debugfs_print_current_performance_level(adev, m); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h index f21a7716b90e..ef31448ee8d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h @@ -32,8 +32,11 @@ struct cg_flag_name void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev); int amdgpu_pm_sysfs_init(struct amdgpu_device *adev); +int amdgpu_pm_virt_sysfs_init(struct amdgpu_device *adev); void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev); +void amdgpu_pm_virt_sysfs_fini(struct amdgpu_device *adev); void amdgpu_pm_print_power_states(struct amdgpu_device *adev); +int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version); void amdgpu_pm_compute_clocks(struct amdgpu_device *adev); void amdgpu_dpm_thermal_work_handler(struct work_struct *work); void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c new file mode 100644 index 000000000000..0e6dba9f60f0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c @@ -0,0 +1,280 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Jonathan Kim <jonathan.kim@amd.com> + * + */ + +#include <linux/perf_event.h> +#include <linux/init.h> +#include "amdgpu.h" +#include "amdgpu_pmu.h" +#include "df_v3_6.h" + +#define PMU_NAME_SIZE 32 + +/* record to keep track of pmu entry per pmu type per device */ +struct amdgpu_pmu_entry { + struct list_head entry; + struct amdgpu_device *adev; + struct pmu pmu; + unsigned int pmu_perf_type; +}; + +static LIST_HEAD(amdgpu_pmu_list); + + +/* initialize perf counter */ +static int amdgpu_perf_event_init(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + /* test the event attr type check for PMU enumeration */ + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* update the hw_perf_event struct with config data */ + hwc->conf = event->attr.config; + + return 0; +} + +/* start perf counter */ +static void amdgpu_perf_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct amdgpu_pmu_entry *pe = container_of(event->pmu, + struct amdgpu_pmu_entry, + pmu); + + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + hwc->state = 0; + + switch (pe->pmu_perf_type) { + case PERF_TYPE_AMDGPU_DF: + if (!(flags & PERF_EF_RELOAD)) + pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 1); + + pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 0); + break; + default: + break; + } + + perf_event_update_userpage(event); + +} + +/* read perf counter */ +static void amdgpu_perf_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct amdgpu_pmu_entry *pe = container_of(event->pmu, + struct amdgpu_pmu_entry, + pmu); + + u64 count, prev; + + do { + prev = local64_read(&hwc->prev_count); + + switch (pe->pmu_perf_type) { + case PERF_TYPE_AMDGPU_DF: + pe->adev->df_funcs->pmc_get_count(pe->adev, hwc->conf, + &count); + break; + default: + count = 0; + break; + }; + } while (local64_cmpxchg(&hwc->prev_count, prev, count) != prev); + + local64_add(count - prev, &event->count); +} + +/* stop perf counter */ +static void amdgpu_perf_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct amdgpu_pmu_entry *pe = container_of(event->pmu, + struct amdgpu_pmu_entry, + pmu); + + if (hwc->state & PERF_HES_UPTODATE) + return; + + switch (pe->pmu_perf_type) { + case PERF_TYPE_AMDGPU_DF: + pe->adev->df_funcs->pmc_stop(pe->adev, hwc->conf, 0); + break; + default: + break; + }; + + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + + if (hwc->state & PERF_HES_UPTODATE) + return; + + amdgpu_perf_read(event); + hwc->state |= PERF_HES_UPTODATE; +} + +/* add perf counter */ +static int amdgpu_perf_add(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + int retval; + + struct amdgpu_pmu_entry *pe = container_of(event->pmu, + struct amdgpu_pmu_entry, + pmu); + + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + switch (pe->pmu_perf_type) { + case PERF_TYPE_AMDGPU_DF: + retval = pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 1); + break; + default: + return 0; + }; + + if (retval) + return retval; + + if (flags & PERF_EF_START) + amdgpu_perf_start(event, PERF_EF_RELOAD); + + return retval; + +} + +/* delete perf counter */ +static void amdgpu_perf_del(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct amdgpu_pmu_entry *pe = container_of(event->pmu, + struct amdgpu_pmu_entry, + pmu); + + amdgpu_perf_stop(event, PERF_EF_UPDATE); + + switch (pe->pmu_perf_type) { + case PERF_TYPE_AMDGPU_DF: + pe->adev->df_funcs->pmc_stop(pe->adev, hwc->conf, 1); + break; + default: + break; + }; + + perf_event_update_userpage(event); +} + +/* vega20 pmus */ + +/* init pmu tracking per pmu type */ +static int init_pmu_by_type(struct amdgpu_device *adev, + const struct attribute_group *attr_groups[], + char *pmu_type_name, char *pmu_file_prefix, + unsigned int pmu_perf_type, + unsigned int num_counters) +{ + char pmu_name[PMU_NAME_SIZE]; + struct amdgpu_pmu_entry *pmu_entry; + int ret = 0; + + pmu_entry = kzalloc(sizeof(struct amdgpu_pmu_entry), GFP_KERNEL); + + if (!pmu_entry) + return -ENOMEM; + + pmu_entry->adev = adev; + pmu_entry->pmu = (struct pmu){ + .event_init = amdgpu_perf_event_init, + .add = amdgpu_perf_add, + .del = amdgpu_perf_del, + .start = amdgpu_perf_start, + .stop = amdgpu_perf_stop, + .read = amdgpu_perf_read, + .task_ctx_nr = perf_invalid_context, + }; + + pmu_entry->pmu.attr_groups = attr_groups; + pmu_entry->pmu_perf_type = pmu_perf_type; + snprintf(pmu_name, PMU_NAME_SIZE, "%s_%d", + pmu_file_prefix, adev->ddev->primary->index); + + ret = perf_pmu_register(&pmu_entry->pmu, pmu_name, -1); + + if (ret) { + kfree(pmu_entry); + pr_warn("Error initializing AMDGPU %s PMUs.\n", pmu_type_name); + return ret; + } + + pr_info("Detected AMDGPU %s Counters. # of Counters = %d.\n", + pmu_type_name, num_counters); + + list_add_tail(&pmu_entry->entry, &amdgpu_pmu_list); + + return 0; +} + +/* init amdgpu_pmu */ +int amdgpu_pmu_init(struct amdgpu_device *adev) +{ + int ret = 0; + + switch (adev->asic_type) { + case CHIP_VEGA20: + /* init df */ + ret = init_pmu_by_type(adev, df_v3_6_attr_groups, + "DF", "amdgpu_df", PERF_TYPE_AMDGPU_DF, + DF_V3_6_MAX_COUNTERS); + + /* other pmu types go here*/ + break; + default: + return 0; + } + + return 0; +} + + +/* destroy all pmu data associated with target device */ +void amdgpu_pmu_fini(struct amdgpu_device *adev) +{ + struct amdgpu_pmu_entry *pe, *temp; + + list_for_each_entry_safe(pe, temp, &amdgpu_pmu_list, entry) { + if (pe->adev == adev) { + list_del(&pe->entry); + perf_pmu_unregister(&pe->pmu); + kfree(pe); + } + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h new file mode 100644 index 000000000000..7dddb7160a11 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h @@ -0,0 +1,37 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Jonathan Kim <jonathan.kim@amd.com> + * + */ + +#ifndef _AMDGPU_PMU_H_ +#define _AMDGPU_PMU_H_ + +enum amdgpu_pmu_perf_type { + PERF_TYPE_AMDGPU_DF = 0, + PERF_TYPE_AMDGPU_MAX +}; + +int amdgpu_pmu_init(struct amdgpu_device *adev); +void amdgpu_pmu_fini(struct amdgpu_device *adev); + +#endif /* _AMDGPU_PMU_H_ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 3091488cd8cc..2770cba56a6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -24,7 +24,7 @@ */ #include <linux/firmware.h> -#include <drm/drmP.h> + #include "amdgpu.h" #include "amdgpu_psp.h" #include "amdgpu_ucode.h" @@ -32,34 +32,42 @@ #include "psp_v3_1.h" #include "psp_v10_0.h" #include "psp_v11_0.h" +#include "psp_v12_0.h" + +#include "amdgpu_ras.h" static void psp_set_funcs(struct amdgpu_device *adev); static int psp_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct psp_context *psp = &adev->psp; psp_set_funcs(adev); - return 0; -} - -static int psp_sw_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct psp_context *psp = &adev->psp; - int ret; - switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: psp_v3_1_set_psp_funcs(psp); + psp->autoload_supported = false; break; case CHIP_RAVEN: psp_v10_0_set_psp_funcs(psp); + psp->autoload_supported = false; break; case CHIP_VEGA20: + case CHIP_ARCTURUS: psp_v11_0_set_psp_funcs(psp); + psp->autoload_supported = false; + break; + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + psp_v11_0_set_psp_funcs(psp); + psp->autoload_supported = true; + break; + case CHIP_RENOIR: + psp_v12_0_set_psp_funcs(psp); break; default: return -EINVAL; @@ -67,12 +75,32 @@ static int psp_sw_init(void *handle) psp->adev = adev; + return 0; +} + +static int psp_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct psp_context *psp = &adev->psp; + int ret; + ret = psp_init_microcode(psp); if (ret) { DRM_ERROR("Failed to load psp firmware!\n"); return ret; } + ret = psp_mem_training_init(psp); + if (ret) { + DRM_ERROR("Failed to initialize memory training!\n"); + return ret; + } + ret = psp_mem_training(psp, PSP_MEM_TRAIN_COLD_BOOT); + if (ret) { + DRM_ERROR("Failed to process memory training!\n"); + return ret; + } + return 0; } @@ -80,6 +108,7 @@ static int psp_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + psp_mem_training_fini(&adev->psp); release_firmware(adev->psp.sos_fw); adev->psp.sos_fw = NULL; release_firmware(adev->psp.asd_fw); @@ -120,21 +149,36 @@ psp_cmd_submit_buf(struct psp_context *psp, { int ret; int index; + int timeout = 2000; + + mutex_lock(&psp->mutex); memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE); memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp)); index = atomic_inc_return(&psp->fence_value); - ret = psp_cmd_submit(psp, ucode, psp->cmd_buf_mc_addr, - fence_mc_addr, index); + ret = psp_cmd_submit(psp, psp->cmd_buf_mc_addr, fence_mc_addr, index); if (ret) { atomic_dec(&psp->fence_value); + mutex_unlock(&psp->mutex); return ret; } - while (*((unsigned int *)psp->fence_buf) != index) + amdgpu_asic_invalidate_hdp(psp->adev, NULL); + while (*((unsigned int *)psp->fence_buf) != index) { + if (--timeout == 0) + break; + /* + * Shouldn't wait for timeout when err_event_athub occurs, + * because gpu reset thread triggered and lock resource should + * be released for psp resume sequence. + */ + if (amdgpu_ras_intr_triggered()) + break; msleep(1); + amdgpu_asic_invalidate_hdp(psp->adev, NULL); + } /* In some cases, psp response status is not 0 even there is no * problem while the command is submitted. Some version of PSP FW @@ -143,12 +187,17 @@ psp_cmd_submit_buf(struct psp_context *psp, * during psp initialization to avoid breaking hw_init and it doesn't * return -EINVAL. */ - if (psp->cmd_buf_mem->resp.status) { + if (psp->cmd_buf_mem->resp.status || !timeout) { if (ucode) DRM_WARN("failed to load ucode id (%d) ", ucode->ucode_id); - DRM_WARN("psp command failed and response status is (%d)\n", - psp->cmd_buf_mem->resp.status); + DRM_DEBUG_DRIVER("psp command (0x%X) failed and response status is (0x%X)\n", + psp->cmd_buf_mem->cmd_id, + psp->cmd_buf_mem->resp.status & GFX_CMD_STATUS_MASK); + if (!timeout) { + mutex_unlock(&psp->mutex); + return -EINVAL; + } } /* get xGMI session id from response buffer */ @@ -158,6 +207,7 @@ psp_cmd_submit_buf(struct psp_context *psp, ucode->tmr_mc_addr_lo = psp->cmd_buf_mem->resp.fw_addr_lo; ucode->tmr_mc_addr_hi = psp->cmd_buf_mem->resp.fw_addr_hi; } + mutex_unlock(&psp->mutex); return ret; } @@ -175,21 +225,73 @@ static void psp_prep_tmr_cmd_buf(struct psp_context *psp, cmd->cmd.cmd_setup_tmr.buf_size = size; } +static void psp_prep_load_toc_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint64_t pri_buf_mc, uint32_t size) +{ + cmd->cmd_id = GFX_CMD_ID_LOAD_TOC; + cmd->cmd.cmd_load_toc.toc_phy_addr_lo = lower_32_bits(pri_buf_mc); + cmd->cmd.cmd_load_toc.toc_phy_addr_hi = upper_32_bits(pri_buf_mc); + cmd->cmd.cmd_load_toc.toc_size = size; +} + +/* Issue LOAD TOC cmd to PSP to part toc and calculate tmr size needed */ +static int psp_load_toc(struct psp_context *psp, + uint32_t *tmr_size) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + /* Copy toc to psp firmware private buffer */ + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + memcpy(psp->fw_pri_buf, psp->toc_start_addr, psp->toc_bin_size); + + psp_prep_load_toc_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->toc_bin_size); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + if (!ret) + *tmr_size = psp->cmd_buf_mem->resp.tmr_size; + kfree(cmd); + return ret; +} + /* Set up Trusted Memory Region */ static int psp_tmr_init(struct psp_context *psp) { int ret; + int tmr_size; + void *tmr_buf; + void **pptr; /* - * Allocate 3M memory aligned to 1M from Frame Buffer (local - * physical). + * According to HW engineer, they prefer the TMR address be "naturally + * aligned" , e.g. the start address be an integer divide of TMR size. * * Note: this memory need be reserved till the driver * uninitializes. */ - ret = amdgpu_bo_create_kernel(psp->adev, PSP_TMR_SIZE, 0x100000, + tmr_size = PSP_TMR_SIZE; + + /* For ASICs support RLC autoload, psp will parse the toc + * and calculate the total size of TMR needed */ + if (!amdgpu_sriov_vf(psp->adev) && + psp->toc_start_addr && + psp->toc_bin_size && + psp->fw_pri_buf) { + ret = psp_load_toc(psp, &tmr_size); + if (ret) { + DRM_ERROR("Failed to load toc\n"); + return ret; + } + } + + pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; + ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); + &psp->tmr_bo, &psp->tmr_mc_addr, pptr); return ret; } @@ -203,21 +305,16 @@ static int psp_tmr_load(struct psp_context *psp) if (!cmd) return -ENOMEM; - psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, PSP_TMR_SIZE); - DRM_INFO("reserve 0x%x from 0x%llx for PSP TMR SIZE\n", - PSP_TMR_SIZE, psp->tmr_mc_addr); + psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, + amdgpu_bo_size(psp->tmr_bo)); + DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n", + amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - if (ret) - goto failed; kfree(cmd); - return 0; - -failed: - kfree(cmd); return ret; } @@ -282,6 +379,34 @@ static int psp_asd_load(struct psp_context *psp) return ret; } +static void psp_prep_reg_prog_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t id, uint32_t value) +{ + cmd->cmd_id = GFX_CMD_ID_PROG_REG; + cmd->cmd.cmd_setup_reg_prog.reg_value = value; + cmd->cmd.cmd_setup_reg_prog.reg_id = id; +} + +int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg, + uint32_t value) +{ + struct psp_gfx_cmd_resp *cmd = NULL; + int ret = 0; + + if (reg >= PSP_REG_LAST) + return -EINVAL; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_reg_prog_cmd_buf(cmd, reg, value); + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + + kfree(cmd); + return ret; +} + static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared, uint32_t xgmi_ta_size, uint32_t shared_size) @@ -442,7 +567,9 @@ static int psp_xgmi_initialize(struct psp_context *psp) struct ta_xgmi_shared_memory *xgmi_cmd; int ret; - if (!psp->adev->psp.ta_fw) + if (!psp->adev->psp.ta_fw || + !psp->adev->psp.ta_xgmi_ucode_size || + !psp->adev->psp.ta_xgmi_start_addr) return -ENOENT; if (!psp->xgmi_context.initialized) { @@ -466,32 +593,635 @@ static int psp_xgmi_initialize(struct psp_context *psp) return ret; } +// ras begin +static void psp_prep_ras_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint64_t ras_ta_mc, uint64_t ras_mc_shared, + uint32_t ras_ta_size, uint32_t shared_size) +{ + cmd->cmd_id = GFX_CMD_ID_LOAD_TA; + cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(ras_ta_mc); + cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ras_ta_mc); + cmd->cmd.cmd_load_ta.app_len = ras_ta_size; + + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(ras_mc_shared); + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(ras_mc_shared); + cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size; +} + +static int psp_ras_init_shared_buf(struct psp_context *psp) +{ + int ret; + + /* + * Allocate 16k memory aligned to 4k from Frame Buffer (local + * physical) for ras ta <-> Driver + */ + ret = amdgpu_bo_create_kernel(psp->adev, PSP_RAS_SHARED_MEM_SIZE, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &psp->ras.ras_shared_bo, + &psp->ras.ras_shared_mc_addr, + &psp->ras.ras_shared_buf); + + return ret; +} + +static int psp_ras_load(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the loading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + memcpy(psp->fw_pri_buf, psp->ta_ras_start_addr, psp->ta_ras_ucode_size); + + psp_prep_ras_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, + psp->ras.ras_shared_mc_addr, + psp->ta_ras_ucode_size, PSP_RAS_SHARED_MEM_SIZE); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + + if (!ret) { + psp->ras.ras_initialized = 1; + psp->ras.session_id = cmd->resp.session_id; + } + + kfree(cmd); + + return ret; +} + +static void psp_prep_ras_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t ras_session_id) +{ + cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA; + cmd->cmd.cmd_unload_ta.session_id = ras_session_id; +} + +static int psp_ras_unload(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the unloading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_ras_ta_unload_cmd_buf(cmd, psp->ras.session_id); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + + kfree(cmd); + + return ret; +} + +static void psp_prep_ras_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t ta_cmd_id, + uint32_t ras_session_id) +{ + cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; + cmd->cmd.cmd_invoke_cmd.session_id = ras_session_id; + cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; + /* Note: cmd_invoke_cmd.buf is not used for now */ +} + +int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the loading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_ras_ta_invoke_cmd_buf(cmd, ta_cmd_id, + psp->ras.session_id); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + + kfree(cmd); + + return ret; +} + +int psp_ras_enable_features(struct psp_context *psp, + union ta_ras_cmd_input *info, bool enable) +{ + struct ta_ras_shared_memory *ras_cmd; + int ret; + + if (!psp->ras.ras_initialized) + return -EINVAL; + + ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; + memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); + + if (enable) + ras_cmd->cmd_id = TA_RAS_COMMAND__ENABLE_FEATURES; + else + ras_cmd->cmd_id = TA_RAS_COMMAND__DISABLE_FEATURES; + + ras_cmd->ras_in_message = *info; + + ret = psp_ras_invoke(psp, ras_cmd->cmd_id); + if (ret) + return -EINVAL; + + return ras_cmd->ras_status; +} + +static int psp_ras_terminate(struct psp_context *psp) +{ + int ret; + + /* + * TODO: bypass the terminate in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->ras.ras_initialized) + return 0; + + ret = psp_ras_unload(psp); + if (ret) + return ret; + + psp->ras.ras_initialized = 0; + + /* free ras shared memory */ + amdgpu_bo_free_kernel(&psp->ras.ras_shared_bo, + &psp->ras.ras_shared_mc_addr, + &psp->ras.ras_shared_buf); + + return 0; +} + +static int psp_ras_initialize(struct psp_context *psp) +{ + int ret; + + /* + * TODO: bypass the initialize in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->adev->psp.ta_ras_ucode_size || + !psp->adev->psp.ta_ras_start_addr) { + dev_warn(psp->adev->dev, "RAS: ras ta ucode is not available\n"); + return 0; + } + + if (!psp->ras.ras_initialized) { + ret = psp_ras_init_shared_buf(psp); + if (ret) + return ret; + } + + ret = psp_ras_load(psp); + if (ret) + return ret; + + return 0; +} +// ras end + +// HDCP start +static void psp_prep_hdcp_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint64_t hdcp_ta_mc, + uint64_t hdcp_mc_shared, + uint32_t hdcp_ta_size, + uint32_t shared_size) +{ + cmd->cmd_id = GFX_CMD_ID_LOAD_TA; + cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(hdcp_ta_mc); + cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(hdcp_ta_mc); + cmd->cmd.cmd_load_ta.app_len = hdcp_ta_size; + + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = + lower_32_bits(hdcp_mc_shared); + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = + upper_32_bits(hdcp_mc_shared); + cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size; +} + +static int psp_hdcp_init_shared_buf(struct psp_context *psp) +{ + int ret; + + /* + * Allocate 16k memory aligned to 4k from Frame Buffer (local + * physical) for hdcp ta <-> Driver + */ + ret = amdgpu_bo_create_kernel(psp->adev, PSP_HDCP_SHARED_MEM_SIZE, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &psp->hdcp_context.hdcp_shared_bo, + &psp->hdcp_context.hdcp_shared_mc_addr, + &psp->hdcp_context.hdcp_shared_buf); + + return ret; +} + +static int psp_hdcp_load(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the loading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + memcpy(psp->fw_pri_buf, psp->ta_hdcp_start_addr, + psp->ta_hdcp_ucode_size); + + psp_prep_hdcp_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, + psp->hdcp_context.hdcp_shared_mc_addr, + psp->ta_hdcp_ucode_size, + PSP_HDCP_SHARED_MEM_SIZE); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + + if (!ret) { + psp->hdcp_context.hdcp_initialized = 1; + psp->hdcp_context.session_id = cmd->resp.session_id; + } + + kfree(cmd); + + return ret; +} +static int psp_hdcp_initialize(struct psp_context *psp) +{ + int ret; + + /* + * TODO: bypass the initialize in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->adev->psp.ta_hdcp_ucode_size || + !psp->adev->psp.ta_hdcp_start_addr) { + dev_warn(psp->adev->dev, "HDCP: hdcp ta ucode is not available\n"); + return 0; + } + + if (!psp->hdcp_context.hdcp_initialized) { + ret = psp_hdcp_init_shared_buf(psp); + if (ret) + return ret; + } + + ret = psp_hdcp_load(psp); + if (ret) + return ret; + + return 0; +} +static void psp_prep_hdcp_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t hdcp_session_id) +{ + cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA; + cmd->cmd.cmd_unload_ta.session_id = hdcp_session_id; +} + +static int psp_hdcp_unload(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the unloading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_hdcp_ta_unload_cmd_buf(cmd, psp->hdcp_context.session_id); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + + kfree(cmd); + + return ret; +} + +static void psp_prep_hdcp_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t ta_cmd_id, + uint32_t hdcp_session_id) +{ + cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; + cmd->cmd.cmd_invoke_cmd.session_id = hdcp_session_id; + cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; + /* Note: cmd_invoke_cmd.buf is not used for now */ +} + +int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the loading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_hdcp_ta_invoke_cmd_buf(cmd, ta_cmd_id, + psp->hdcp_context.session_id); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + + kfree(cmd); + + return ret; +} + +static int psp_hdcp_terminate(struct psp_context *psp) +{ + int ret; + + /* + * TODO: bypass the terminate in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->hdcp_context.hdcp_initialized) + return 0; + + ret = psp_hdcp_unload(psp); + if (ret) + return ret; + + psp->hdcp_context.hdcp_initialized = 0; + + /* free hdcp shared memory */ + amdgpu_bo_free_kernel(&psp->hdcp_context.hdcp_shared_bo, + &psp->hdcp_context.hdcp_shared_mc_addr, + &psp->hdcp_context.hdcp_shared_buf); + + return 0; +} +// HDCP end + +// DTM start +static void psp_prep_dtm_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint64_t dtm_ta_mc, + uint64_t dtm_mc_shared, + uint32_t dtm_ta_size, + uint32_t shared_size) +{ + cmd->cmd_id = GFX_CMD_ID_LOAD_TA; + cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(dtm_ta_mc); + cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(dtm_ta_mc); + cmd->cmd.cmd_load_ta.app_len = dtm_ta_size; + + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(dtm_mc_shared); + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(dtm_mc_shared); + cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size; +} + +static int psp_dtm_init_shared_buf(struct psp_context *psp) +{ + int ret; + + /* + * Allocate 16k memory aligned to 4k from Frame Buffer (local + * physical) for dtm ta <-> Driver + */ + ret = amdgpu_bo_create_kernel(psp->adev, PSP_DTM_SHARED_MEM_SIZE, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &psp->dtm_context.dtm_shared_bo, + &psp->dtm_context.dtm_shared_mc_addr, + &psp->dtm_context.dtm_shared_buf); + + return ret; +} + +static int psp_dtm_load(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the loading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + memcpy(psp->fw_pri_buf, psp->ta_dtm_start_addr, psp->ta_dtm_ucode_size); + + psp_prep_dtm_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, + psp->dtm_context.dtm_shared_mc_addr, + psp->ta_dtm_ucode_size, + PSP_DTM_SHARED_MEM_SIZE); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + + if (!ret) { + psp->dtm_context.dtm_initialized = 1; + psp->dtm_context.session_id = cmd->resp.session_id; + } + + kfree(cmd); + + return ret; +} + +static int psp_dtm_initialize(struct psp_context *psp) +{ + int ret; + + /* + * TODO: bypass the initialize in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->adev->psp.ta_dtm_ucode_size || + !psp->adev->psp.ta_dtm_start_addr) { + dev_warn(psp->adev->dev, "DTM: dtm ta ucode is not available\n"); + return 0; + } + + if (!psp->dtm_context.dtm_initialized) { + ret = psp_dtm_init_shared_buf(psp); + if (ret) + return ret; + } + + ret = psp_dtm_load(psp); + if (ret) + return ret; + + return 0; +} + +static void psp_prep_dtm_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t ta_cmd_id, + uint32_t dtm_session_id) +{ + cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; + cmd->cmd.cmd_invoke_cmd.session_id = dtm_session_id; + cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; + /* Note: cmd_invoke_cmd.buf is not used for now */ +} + +int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the loading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_dtm_ta_invoke_cmd_buf(cmd, ta_cmd_id, + psp->dtm_context.session_id); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + + kfree(cmd); + + return ret; +} + +static int psp_dtm_terminate(struct psp_context *psp) +{ + int ret; + + /* + * TODO: bypass the terminate in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->dtm_context.dtm_initialized) + return 0; + + ret = psp_hdcp_unload(psp); + if (ret) + return ret; + + psp->dtm_context.dtm_initialized = 0; + + /* free hdcp shared memory */ + amdgpu_bo_free_kernel(&psp->dtm_context.dtm_shared_bo, + &psp->dtm_context.dtm_shared_mc_addr, + &psp->dtm_context.dtm_shared_buf); + + return 0; +} +// DTM end + static int psp_hw_start(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; int ret; if (!amdgpu_sriov_vf(adev) || !adev->in_gpu_reset) { + if (psp->kdb_bin_size && + (psp->funcs->bootloader_load_kdb != NULL)) { + ret = psp_bootloader_load_kdb(psp); + if (ret) { + DRM_ERROR("PSP load kdb failed!\n"); + return ret; + } + } + ret = psp_bootloader_load_sysdrv(psp); - if (ret) + if (ret) { + DRM_ERROR("PSP load sysdrv failed!\n"); return ret; + } ret = psp_bootloader_load_sos(psp); - if (ret) + if (ret) { + DRM_ERROR("PSP load sos failed!\n"); return ret; + } } ret = psp_ring_create(psp, PSP_RING_TYPE__KM); - if (ret) + if (ret) { + DRM_ERROR("PSP create ring failed!\n"); + return ret; + } + + ret = psp_tmr_init(psp); + if (ret) { + DRM_ERROR("PSP tmr init failed!\n"); return ret; + } ret = psp_tmr_load(psp); - if (ret) + if (ret) { + DRM_ERROR("PSP load tmr failed!\n"); + return ret; + } + + ret = psp_asd_init(psp); + if (ret) { + DRM_ERROR("PSP asd init failed!\n"); return ret; + } ret = psp_asd_load(psp); - if (ret) + if (ret) { + DRM_ERROR("PSP load asd failed!\n"); return ret; + } if (adev->gmc.xgmi.num_physical_nodes > 1) { ret = psp_xgmi_initialize(psp); @@ -502,6 +1232,24 @@ static int psp_hw_start(struct psp_context *psp) dev_err(psp->adev->dev, "XGMI: Failed to initialize XGMI session\n"); } + + if (psp->adev->psp.ta_fw) { + ret = psp_ras_initialize(psp); + if (ret) + dev_err(psp->adev->dev, + "RAS: Failed to initialize RAS\n"); + + ret = psp_hdcp_initialize(psp); + if (ret) + dev_err(psp->adev->dev, + "HDCP: Failed to initialize HDCP\n"); + + ret = psp_dtm_initialize(psp); + if (ret) + dev_err(psp->adev->dev, + "DTM: Failed to initialize DTM\n"); + } + return 0; } @@ -515,6 +1263,24 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, case AMDGPU_UCODE_ID_SDMA1: *type = GFX_FW_TYPE_SDMA1; break; + case AMDGPU_UCODE_ID_SDMA2: + *type = GFX_FW_TYPE_SDMA2; + break; + case AMDGPU_UCODE_ID_SDMA3: + *type = GFX_FW_TYPE_SDMA3; + break; + case AMDGPU_UCODE_ID_SDMA4: + *type = GFX_FW_TYPE_SDMA4; + break; + case AMDGPU_UCODE_ID_SDMA5: + *type = GFX_FW_TYPE_SDMA5; + break; + case AMDGPU_UCODE_ID_SDMA6: + *type = GFX_FW_TYPE_SDMA6; + break; + case AMDGPU_UCODE_ID_SDMA7: + *type = GFX_FW_TYPE_SDMA7; + break; case AMDGPU_UCODE_ID_CP_CE: *type = GFX_FW_TYPE_CP_CE; break; @@ -569,6 +1335,12 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, case AMDGPU_UCODE_ID_DMCU_INTV: *type = GFX_FW_TYPE_DMCU_ISR; break; + case AMDGPU_UCODE_ID_VCN0_RAM: + *type = GFX_FW_TYPE_VCN0_RAM; + break; + case AMDGPU_UCODE_ID_VCN1_RAM: + *type = GFX_FW_TYPE_VCN1_RAM; + break; case AMDGPU_UCODE_ID_MAXIMUM: default: return -EINVAL; @@ -577,6 +1349,54 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, return 0; } +static void psp_print_fw_hdr(struct psp_context *psp, + struct amdgpu_firmware_info *ucode) +{ + struct amdgpu_device *adev = psp->adev; + struct common_firmware_header *hdr; + + switch (ucode->ucode_id) { + case AMDGPU_UCODE_ID_SDMA0: + case AMDGPU_UCODE_ID_SDMA1: + case AMDGPU_UCODE_ID_SDMA2: + case AMDGPU_UCODE_ID_SDMA3: + case AMDGPU_UCODE_ID_SDMA4: + case AMDGPU_UCODE_ID_SDMA5: + case AMDGPU_UCODE_ID_SDMA6: + case AMDGPU_UCODE_ID_SDMA7: + hdr = (struct common_firmware_header *) + adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data; + amdgpu_ucode_print_sdma_hdr(hdr); + break; + case AMDGPU_UCODE_ID_CP_CE: + hdr = (struct common_firmware_header *)adev->gfx.ce_fw->data; + amdgpu_ucode_print_gfx_hdr(hdr); + break; + case AMDGPU_UCODE_ID_CP_PFP: + hdr = (struct common_firmware_header *)adev->gfx.pfp_fw->data; + amdgpu_ucode_print_gfx_hdr(hdr); + break; + case AMDGPU_UCODE_ID_CP_ME: + hdr = (struct common_firmware_header *)adev->gfx.me_fw->data; + amdgpu_ucode_print_gfx_hdr(hdr); + break; + case AMDGPU_UCODE_ID_CP_MEC1: + hdr = (struct common_firmware_header *)adev->gfx.mec_fw->data; + amdgpu_ucode_print_gfx_hdr(hdr); + break; + case AMDGPU_UCODE_ID_RLC_G: + hdr = (struct common_firmware_header *)adev->gfx.rlc_fw->data; + amdgpu_ucode_print_rlc_hdr(hdr); + break; + case AMDGPU_UCODE_ID_SMC: + hdr = (struct common_firmware_header *)adev->pm.fw->data; + amdgpu_ucode_print_smc_hdr(hdr); + break; + default: + break; + } +} + static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd) { @@ -597,36 +1417,84 @@ static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode, return ret; } +static int psp_execute_np_fw_load(struct psp_context *psp, + struct amdgpu_firmware_info *ucode) +{ + int ret = 0; + + ret = psp_prep_load_ip_fw_cmd_buf(ucode, psp->cmd); + if (ret) + return ret; + + ret = psp_cmd_submit_buf(psp, ucode, psp->cmd, + psp->fence_buf_mc_addr); + + return ret; +} + static int psp_np_fw_load(struct psp_context *psp) { int i, ret; struct amdgpu_firmware_info *ucode; struct amdgpu_device* adev = psp->adev; + if (psp->autoload_supported) { + ucode = &adev->firmware.ucode[AMDGPU_UCODE_ID_SMC]; + if (!ucode->fw) + goto out; + + ret = psp_execute_np_fw_load(psp, ucode); + if (ret) + return ret; + } + +out: for (i = 0; i < adev->firmware.max_ucodes; i++) { ucode = &adev->firmware.ucode[i]; if (!ucode->fw) continue; if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC && - psp_smu_reload_quirk(psp)) + (psp_smu_reload_quirk(psp) || psp->autoload_supported)) continue; + if (amdgpu_sriov_vf(adev) && (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA0 || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 - || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G)) + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2 + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3 + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA4 + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA5 + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA6 + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA7 + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) /*skip ucode loading in SRIOV VF */ continue; - ret = psp_prep_load_ip_fw_cmd_buf(ucode, psp->cmd); - if (ret) - return ret; + if (psp->autoload_supported && + (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC1_JT || + ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT)) + /* skip mec JT when autoload is enabled */ + continue; + + psp_print_fw_hdr(psp, ucode); - ret = psp_cmd_submit_buf(psp, ucode, psp->cmd, - psp->fence_buf_mc_addr); + ret = psp_execute_np_fw_load(psp, ucode); if (ret) return ret; + /* Start rlc autoload after psp recieved all the gfx firmware */ + if (psp->autoload_supported && ucode->ucode_id == + AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) { + ret = psp_rlc_autoload(psp); + if (ret) { + DRM_ERROR("Failed to start rlc autoload\n"); + return ret; + } + } #if 0 /* check if firmware loaded sucessfully */ if (!amdgpu_psp_check_fw_loading_status(adev, i)) @@ -651,13 +1519,16 @@ static int psp_load_fw(struct amdgpu_device *adev) if (!psp->cmd) return -ENOMEM; - ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, - AMDGPU_GEM_DOMAIN_GTT, - &psp->fw_pri_bo, - &psp->fw_pri_mc_addr, - &psp->fw_pri_buf); - if (ret) - goto failed; + /* this fw pri bo is not used under SRIOV */ + if (!amdgpu_sriov_vf(psp->adev)) { + ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, + AMDGPU_GEM_DOMAIN_GTT, + &psp->fw_pri_bo, + &psp->fw_pri_mc_addr, + &psp->fw_pri_buf); + if (ret) + goto failed; + } ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, @@ -665,53 +1536,40 @@ static int psp_load_fw(struct amdgpu_device *adev) &psp->fence_buf_mc_addr, &psp->fence_buf); if (ret) - goto failed_mem2; + goto failed; ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, (void **)&psp->cmd_buf_mem); if (ret) - goto failed_mem1; + goto failed; memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE); ret = psp_ring_init(psp, PSP_RING_TYPE__KM); - if (ret) - goto failed_mem; - - ret = psp_tmr_init(psp); - if (ret) - goto failed_mem; - - ret = psp_asd_init(psp); - if (ret) - goto failed_mem; + if (ret) { + DRM_ERROR("PSP ring init failed!\n"); + goto failed; + } skip_memalloc: ret = psp_hw_start(psp); if (ret) - goto failed_mem; + goto failed; ret = psp_np_fw_load(psp); if (ret) - goto failed_mem; + goto failed; return 0; -failed_mem: - amdgpu_bo_free_kernel(&psp->cmd_buf_bo, - &psp->cmd_buf_mc_addr, - (void **)&psp->cmd_buf_mem); -failed_mem1: - amdgpu_bo_free_kernel(&psp->fence_buf_bo, - &psp->fence_buf_mc_addr, &psp->fence_buf); -failed_mem2: - amdgpu_bo_free_kernel(&psp->fw_pri_bo, - &psp->fw_pri_mc_addr, &psp->fw_pri_buf); failed: - kfree(psp->cmd); - psp->cmd = NULL; + /* + * all cleanup jobs (xgmi terminate, ras terminate, + * ring destroy, cmd/fence/fw buffers destory, + * psp->cmd destory) are delayed to psp_hw_fini + */ return ret; } @@ -748,14 +1606,23 @@ static int psp_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; + void *tmr_buf; + void **pptr; if (adev->gmc.xgmi.num_physical_nodes > 1 && psp->xgmi_context.initialized == 1) psp_xgmi_terminate(psp); + if (psp->adev->psp.ta_fw) { + psp_ras_terminate(psp); + psp_dtm_terminate(psp); + psp_hdcp_terminate(psp); + } + psp_ring_destroy(psp, PSP_RING_TYPE__KM); - amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); + pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; + amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr); amdgpu_bo_free_kernel(&psp->fw_pri_bo, &psp->fw_pri_mc_addr, &psp->fw_pri_buf); amdgpu_bo_free_kernel(&psp->fence_buf_bo, @@ -786,6 +1653,24 @@ static int psp_suspend(void *handle) } } + if (psp->adev->psp.ta_fw) { + ret = psp_ras_terminate(psp); + if (ret) { + DRM_ERROR("Failed to terminate ras ta\n"); + return ret; + } + ret = psp_hdcp_terminate(psp); + if (ret) { + DRM_ERROR("Failed to terminate hdcp ta\n"); + return ret; + } + ret = psp_dtm_terminate(psp); + if (ret) { + DRM_ERROR("Failed to terminate dtm ta\n"); + return ret; + } + } + ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); if (ret) { DRM_ERROR("PSP ring stop failed\n"); @@ -803,6 +1688,12 @@ static int psp_resume(void *handle) DRM_INFO("PSP is resuming...\n"); + ret = psp_mem_training(psp, PSP_MEM_TRAIN_RESUME); + if (ret) { + DRM_ERROR("Failed to process memory training!\n"); + return ret; + } + mutex_lock(&adev->firmware.mutex); ret = psp_hw_start(psp); @@ -825,10 +1716,46 @@ failed: int psp_gpu_reset(struct amdgpu_device *adev) { + int ret; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) return 0; - return psp_mode1_reset(&adev->psp); + mutex_lock(&adev->psp.mutex); + ret = psp_mode1_reset(&adev->psp); + mutex_unlock(&adev->psp.mutex); + + return ret; +} + +int psp_rlc_autoload_start(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + cmd->cmd_id = GFX_CMD_ID_AUTOLOAD_RLC; + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + kfree(cmd); + return ret; +} + +int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, + uint64_t cmd_gpu_addr, int cmd_size) +{ + struct amdgpu_firmware_info ucode = {0}; + + ucode.ucode_id = inst_idx ? AMDGPU_UCODE_ID_VCN1_RAM : + AMDGPU_UCODE_ID_VCN0_RAM; + ucode.mc_addr = cmd_gpu_addr; + ucode.ucode_size = cmd_size; + + return psp_execute_np_fw_load(&adev->psp, &ucode); } static bool psp_check_fw_loading_status(struct amdgpu_device *adev, @@ -912,3 +1839,12 @@ const struct amdgpu_ip_block_version psp_v11_0_ip_block = .rev = 0, .funcs = &psp_ip_funcs, }; + +const struct amdgpu_ip_block_version psp_v12_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_PSP, + .major = 12, + .minor = 0, + .rev = 0, + .funcs = &psp_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 2ef98cc755d6..09c5474ebcc3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -28,18 +28,31 @@ #include "amdgpu.h" #include "psp_gfx_if.h" #include "ta_xgmi_if.h" +#include "ta_ras_if.h" #define PSP_FENCE_BUFFER_SIZE 0x1000 #define PSP_CMD_BUFFER_SIZE 0x1000 #define PSP_ASD_SHARED_MEM_SIZE 0x4000 #define PSP_XGMI_SHARED_MEM_SIZE 0x4000 +#define PSP_RAS_SHARED_MEM_SIZE 0x4000 #define PSP_1_MEG 0x100000 #define PSP_TMR_SIZE 0x400000 +#define PSP_HDCP_SHARED_MEM_SIZE 0x4000 +#define PSP_DTM_SHARED_MEM_SIZE 0x4000 +#define PSP_SHARED_MEM_SIZE 0x4000 struct psp_context; struct psp_xgmi_node_info; struct psp_xgmi_topology_info; +enum psp_bootloader_cmd { + PSP_BL__LOAD_SYSDRV = 0x10000, + PSP_BL__LOAD_SOSDRV = 0x20000, + PSP_BL__LOAD_KEY_DATABASE = 0x80000, + PSP_BL__DRAM_LONG_TRAIN = 0x100000, + PSP_BL__DRAM_SHORT_TRAIN = 0x200000, +}; + enum psp_ring_type { PSP_RING_TYPE__INVALID = 0, @@ -60,9 +73,18 @@ struct psp_ring uint32_t ring_size; }; +/* More registers may will be supported */ +enum psp_reg_prog_id { + PSP_REG_IH_RB_CNTL = 0, /* register IH_RB_CNTL */ + PSP_REG_IH_RB_CNTL_RING1 = 1, /* register IH_RB_CNTL_RING1 */ + PSP_REG_IH_RB_CNTL_RING2 = 2, /* register IH_RB_CNTL_RING2 */ + PSP_REG_LAST +}; + struct psp_funcs { int (*init_microcode)(struct psp_context *psp); + int (*bootloader_load_kdb)(struct psp_context *psp); int (*bootloader_load_sysdrv)(struct psp_context *psp); int (*bootloader_load_sos)(struct psp_context *psp); int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type); @@ -73,7 +95,6 @@ struct psp_funcs int (*ring_destroy)(struct psp_context *psp, enum psp_ring_type ring_type); int (*cmd_submit)(struct psp_context *psp, - struct amdgpu_firmware_info *ucode, uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, int index); bool (*compare_sram_data)(struct psp_context *psp, @@ -88,6 +109,26 @@ struct psp_funcs int (*xgmi_set_topology_info)(struct psp_context *psp, int number_devices, struct psp_xgmi_topology_info *topology); bool (*support_vmr_ring)(struct psp_context *psp); + int (*ras_trigger_error)(struct psp_context *psp, + struct ta_ras_trigger_error_input *info); + int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr); + int (*rlc_autoload_start)(struct psp_context *psp); + int (*mem_training_init)(struct psp_context *psp); + void (*mem_training_fini)(struct psp_context *psp); + int (*mem_training)(struct psp_context *psp, uint32_t ops); +}; + +#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64 +struct psp_xgmi_node_info { + uint64_t node_id; + uint8_t num_hops; + uint8_t is_sharing_enabled; + enum ta_xgmi_assigned_sdma_engine sdma_engine; +}; + +struct psp_xgmi_topology_info { + uint32_t num_nodes; + struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES]; }; struct psp_xgmi_context { @@ -96,6 +137,76 @@ struct psp_xgmi_context { struct amdgpu_bo *xgmi_shared_bo; uint64_t xgmi_shared_mc_addr; void *xgmi_shared_buf; + struct psp_xgmi_topology_info top_info; +}; + +struct psp_ras_context { + /*ras fw*/ + bool ras_initialized; + uint32_t session_id; + struct amdgpu_bo *ras_shared_bo; + uint64_t ras_shared_mc_addr; + void *ras_shared_buf; + struct amdgpu_ras *ras; +}; + +struct psp_hdcp_context { + bool hdcp_initialized; + uint32_t session_id; + struct amdgpu_bo *hdcp_shared_bo; + uint64_t hdcp_shared_mc_addr; + void *hdcp_shared_buf; +}; + +struct psp_dtm_context { + bool dtm_initialized; + uint32_t session_id; + struct amdgpu_bo *dtm_shared_bo; + uint64_t dtm_shared_mc_addr; + void *dtm_shared_buf; +}; + +#define MEM_TRAIN_SYSTEM_SIGNATURE 0x54534942 +#define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES 0x1000 +#define GDDR6_MEM_TRAINING_OFFSET 0x8000 + +enum psp_memory_training_init_flag { + PSP_MEM_TRAIN_NOT_SUPPORT = 0x0, + PSP_MEM_TRAIN_SUPPORT = 0x1, + PSP_MEM_TRAIN_INIT_FAILED = 0x2, + PSP_MEM_TRAIN_RESERVE_SUCCESS = 0x4, + PSP_MEM_TRAIN_INIT_SUCCESS = 0x8, +}; + +enum psp_memory_training_ops { + PSP_MEM_TRAIN_SEND_LONG_MSG = 0x1, + PSP_MEM_TRAIN_SAVE = 0x2, + PSP_MEM_TRAIN_RESTORE = 0x4, + PSP_MEM_TRAIN_SEND_SHORT_MSG = 0x8, + PSP_MEM_TRAIN_COLD_BOOT = PSP_MEM_TRAIN_SEND_LONG_MSG, + PSP_MEM_TRAIN_RESUME = PSP_MEM_TRAIN_SEND_SHORT_MSG, +}; + +struct psp_memory_training_context { + /*training data size*/ + u64 train_data_size; + /* + * sys_cache + * cpu virtual address + * system memory buffer that used to store the training data. + */ + void *sys_cache; + + /*vram offset of the p2c training data*/ + u64 p2c_train_data_offset; + struct amdgpu_bo *p2c_bo; + + /*vram offset of the c2p training data*/ + u64 c2p_train_data_offset; + struct amdgpu_bo *c2p_bo; + + enum psp_memory_training_init_flag init; + u32 training_cnt; }; struct psp_context @@ -117,13 +228,16 @@ struct psp_context uint32_t sos_feature_version; uint32_t sys_bin_size; uint32_t sos_bin_size; + uint32_t toc_bin_size; + uint32_t kdb_bin_size; uint8_t *sys_start_addr; uint8_t *sos_start_addr; + uint8_t *toc_start_addr; + uint8_t *kdb_start_addr; /* tmr buffer */ struct amdgpu_bo *tmr_bo; uint64_t tmr_mc_addr; - void *tmr_buf; /* asd firmware and buffer */ const struct firmware *asd_fw; @@ -147,13 +261,33 @@ struct psp_context /* fence value associated with cmd buffer */ atomic_t fence_value; + /* flag to mark whether gfx fw autoload is supported or not */ + bool autoload_supported; /* xgmi ta firmware and buffer */ const struct firmware *ta_fw; + uint32_t ta_fw_version; uint32_t ta_xgmi_ucode_version; uint32_t ta_xgmi_ucode_size; uint8_t *ta_xgmi_start_addr; + uint32_t ta_ras_ucode_version; + uint32_t ta_ras_ucode_size; + uint8_t *ta_ras_start_addr; + + uint32_t ta_hdcp_ucode_version; + uint32_t ta_hdcp_ucode_size; + uint8_t *ta_hdcp_start_addr; + + uint32_t ta_dtm_ucode_version; + uint32_t ta_dtm_ucode_size; + uint8_t *ta_dtm_start_addr; + struct psp_xgmi_context xgmi_context; + struct psp_ras_context ras; + struct psp_hdcp_context hdcp_context; + struct psp_dtm_context dtm_context; + struct mutex mutex; + struct psp_memory_training_context mem_train_ctx; }; struct amdgpu_psp_funcs { @@ -161,29 +295,19 @@ struct amdgpu_psp_funcs { enum AMDGPU_UCODE_ID); }; -#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64 -struct psp_xgmi_node_info { - uint64_t node_id; - uint8_t num_hops; - uint8_t is_sharing_enabled; - enum ta_xgmi_assigned_sdma_engine sdma_engine; -}; - -struct psp_xgmi_topology_info { - uint32_t num_nodes; - struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES]; -}; #define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type)) #define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type)) #define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type)) #define psp_ring_destroy(psp, type) ((psp)->funcs->ring_destroy((psp), (type))) -#define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \ - (psp)->funcs->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index)) +#define psp_cmd_submit(psp, cmd_mc, fence_mc, index) \ + (psp)->funcs->cmd_submit((psp), (cmd_mc), (fence_mc), (index)) #define psp_compare_sram_data(psp, ucode, type) \ (psp)->funcs->compare_sram_data((psp), (ucode), (type)) #define psp_init_microcode(psp) \ ((psp)->funcs->init_microcode ? (psp)->funcs->init_microcode((psp)) : 0) +#define psp_bootloader_load_kdb(psp) \ + ((psp)->funcs->bootloader_load_kdb ? (psp)->funcs->bootloader_load_kdb((psp)) : 0) #define psp_bootloader_load_sysdrv(psp) \ ((psp)->funcs->bootloader_load_sysdrv ? (psp)->funcs->bootloader_load_sysdrv((psp)) : 0) #define psp_bootloader_load_sos(psp) \ @@ -204,9 +328,24 @@ struct psp_xgmi_topology_info { #define psp_xgmi_set_topology_info(psp, num_device, topology) \ ((psp)->funcs->xgmi_set_topology_info ? \ (psp)->funcs->xgmi_set_topology_info((psp), (num_device), (topology)) : -EINVAL) +#define psp_rlc_autoload(psp) \ + ((psp)->funcs->rlc_autoload_start ? (psp)->funcs->rlc_autoload_start((psp)) : 0) +#define psp_mem_training_init(psp) \ + ((psp)->funcs->mem_training_init ? (psp)->funcs->mem_training_init((psp)) : 0) +#define psp_mem_training_fini(psp) \ + ((psp)->funcs->mem_training_fini ? (psp)->funcs->mem_training_fini((psp)) : 0) +#define psp_mem_training(psp, ops) \ + ((psp)->funcs->mem_training ? (psp)->funcs->mem_training((psp), (ops)) : 0) #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i)) +#define psp_ras_trigger_error(psp, info) \ + ((psp)->funcs->ras_trigger_error ? \ + (psp)->funcs->ras_trigger_error((psp), (info)) : -EINVAL) +#define psp_ras_cure_posion(psp, addr) \ + ((psp)->funcs->ras_cure_posion ? \ + (psp)->funcs->ras_cure_posion(psp, (addr)) : -EINVAL) + extern const struct amd_ip_funcs psp_ip_funcs; extern const struct amdgpu_ip_block_version psp_v3_1_ip_block; @@ -214,9 +353,23 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, uint32_t field_val, uint32_t mask, bool check_changed); extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; +extern const struct amdgpu_ip_block_version psp_v12_0_ip_block; int psp_gpu_reset(struct amdgpu_device *adev); +int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, + uint64_t cmd_gpu_addr, int cmd_size); + int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id); -extern const struct amdgpu_ip_block_version psp_v11_0_ip_block; +int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id); +int psp_ras_enable_features(struct psp_context *psp, + union ta_ras_cmd_input *info, bool enable); +int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id); +int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id); + +int psp_rlc_autoload_start(struct psp_context *psp); + +extern const struct amdgpu_ip_block_version psp_v11_0_ip_block; +int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg, + uint32_t value); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c new file mode 100644 index 000000000000..404483437bd3 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -0,0 +1,1940 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * + */ +#include <linux/debugfs.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/uaccess.h> +#include <linux/reboot.h> +#include <linux/syscalls.h> + +#include "amdgpu.h" +#include "amdgpu_ras.h" +#include "amdgpu_atomfirmware.h" +#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" + +const char *ras_error_string[] = { + "none", + "parity", + "single_correctable", + "multi_uncorrectable", + "poison", +}; + +const char *ras_block_string[] = { + "umc", + "sdma", + "gfx", + "mmhub", + "athub", + "pcie_bif", + "hdp", + "xgmi_wafl", + "df", + "smn", + "sem", + "mp0", + "mp1", + "fuse", +}; + +#define ras_err_str(i) (ras_error_string[ffs(i)]) +#define ras_block_str(i) (ras_block_string[i]) + +#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1 +#define AMDGPU_RAS_FLAG_INIT_NEED_RESET 2 +#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS) + +/* inject address is 52 bits */ +#define RAS_UMC_INJECT_ADDR_LIMIT (0x1ULL << 52) + +enum amdgpu_ras_retire_page_reservation { + AMDGPU_RAS_RETIRE_PAGE_RESERVED, + AMDGPU_RAS_RETIRE_PAGE_PENDING, + AMDGPU_RAS_RETIRE_PAGE_FAULT, +}; + +atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0); + +static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, + uint64_t addr); + +static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct ras_manager *obj = (struct ras_manager *)file_inode(f)->i_private; + struct ras_query_if info = { + .head = obj->head, + }; + ssize_t s; + char val[128]; + + if (amdgpu_ras_error_query(obj->adev, &info)) + return -EINVAL; + + s = snprintf(val, sizeof(val), "%s: %lu\n%s: %lu\n", + "ue", info.ue_count, + "ce", info.ce_count); + if (*pos >= s) + return 0; + + s -= *pos; + s = min_t(u64, s, size); + + + if (copy_to_user(buf, &val[*pos], s)) + return -EINVAL; + + *pos += s; + + return s; +} + +static const struct file_operations amdgpu_ras_debugfs_ops = { + .owner = THIS_MODULE, + .read = amdgpu_ras_debugfs_read, + .write = NULL, + .llseek = default_llseek +}; + +static int amdgpu_ras_find_block_id_by_name(const char *name, int *block_id) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ras_block_string); i++) { + *block_id = i; + if (strcmp(name, ras_block_str(i)) == 0) + return 0; + } + return -EINVAL; +} + +static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, + const char __user *buf, size_t size, + loff_t *pos, struct ras_debug_if *data) +{ + ssize_t s = min_t(u64, 64, size); + char str[65]; + char block_name[33]; + char err[9] = "ue"; + int op = -1; + int block_id; + uint32_t sub_block; + u64 address, value; + + if (*pos) + return -EINVAL; + *pos = size; + + memset(str, 0, sizeof(str)); + memset(data, 0, sizeof(*data)); + + if (copy_from_user(str, buf, s)) + return -EINVAL; + + if (sscanf(str, "disable %32s", block_name) == 1) + op = 0; + else if (sscanf(str, "enable %32s %8s", block_name, err) == 2) + op = 1; + else if (sscanf(str, "inject %32s %8s", block_name, err) == 2) + op = 2; + else if (str[0] && str[1] && str[2] && str[3]) + /* ascii string, but commands are not matched. */ + return -EINVAL; + + if (op != -1) { + if (amdgpu_ras_find_block_id_by_name(block_name, &block_id)) + return -EINVAL; + + data->head.block = block_id; + /* only ue and ce errors are supported */ + if (!memcmp("ue", err, 2)) + data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + else if (!memcmp("ce", err, 2)) + data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE; + else + return -EINVAL; + + data->op = op; + + if (op == 2) { + if (sscanf(str, "%*s %*s %*s %u %llu %llu", + &sub_block, &address, &value) != 3) + if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx", + &sub_block, &address, &value) != 3) + return -EINVAL; + data->head.sub_block_index = sub_block; + data->inject.address = address; + data->inject.value = value; + } + } else { + if (size < sizeof(*data)) + return -EINVAL; + + if (copy_from_user(data, buf, sizeof(*data))) + return -EINVAL; + } + + return 0; +} + +static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, + struct ras_common_if *head); + +/** + * DOC: AMDGPU RAS debugfs control interface + * + * It accepts struct ras_debug_if who has two members. + * + * First member: ras_debug_if::head or ras_debug_if::inject. + * + * head is used to indicate which IP block will be under control. + * + * head has four members, they are block, type, sub_block_index, name. + * block: which IP will be under control. + * type: what kind of error will be enabled/disabled/injected. + * sub_block_index: some IPs have subcomponets. say, GFX, sDMA. + * name: the name of IP. + * + * inject has two more members than head, they are address, value. + * As their names indicate, inject operation will write the + * value to the address. + * + * The second member: struct ras_debug_if::op. + * It has three kinds of operations. + * + * - 0: disable RAS on the block. Take ::head as its data. + * - 1: enable RAS on the block. Take ::head as its data. + * - 2: inject errors on the block. Take ::inject as its data. + * + * How to use the interface? + * + * Programs + * + * Copy the struct ras_debug_if in your codes and initialize it. + * Write the struct to the control node. + * + * Shells + * + * .. code-block:: bash + * + * echo op block [error [sub_block address value]] > .../ras/ras_ctrl + * + * Parameters: + * + * op: disable, enable, inject + * disable: only block is needed + * enable: block and error are needed + * inject: error, address, value are needed + * block: umc, sdma, gfx, ......... + * see ras_block_string[] for details + * error: ue, ce + * ue: multi_uncorrectable + * ce: single_correctable + * sub_block: + * sub block index, pass 0 if there is no sub block + * + * here are some examples for bash commands: + * + * .. code-block:: bash + * + * echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl + * echo inject umc ce 0 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl + * echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl + * + * How to check the result? + * + * For disable/enable, please check ras features at + * /sys/class/drm/card[0/1/2...]/device/ras/features + * + * For inject, please check corresponding err count at + * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count + * + * .. note:: + * Operations are only allowed on blocks which are supported. + * Please check ras mask at /sys/module/amdgpu/parameters/ras_mask + * to see which blocks support RAS on a particular asic. + * + */ +static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; + struct ras_debug_if data; + int ret = 0; + + ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data); + if (ret) + return -EINVAL; + + if (!amdgpu_ras_is_supported(adev, data.head.block)) + return -EINVAL; + + switch (data.op) { + case 0: + ret = amdgpu_ras_feature_enable(adev, &data.head, 0); + break; + case 1: + ret = amdgpu_ras_feature_enable(adev, &data.head, 1); + break; + case 2: + if ((data.inject.address >= adev->gmc.mc_vram_size) || + (data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) { + ret = -EINVAL; + break; + } + + /* umc ce/ue error injection for a bad page is not allowed */ + if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) && + amdgpu_ras_check_bad_page(adev, data.inject.address)) { + DRM_WARN("RAS WARN: 0x%llx has been marked as bad before error injection!\n", + data.inject.address); + break; + } + + /* data.inject.address is offset instead of absolute gpu address */ + ret = amdgpu_ras_error_inject(adev, &data.inject); + break; + default: + ret = -EINVAL; + break; + }; + + if (ret) + return -EINVAL; + + return size; +} + +/** + * DOC: AMDGPU RAS debugfs EEPROM table reset interface + * + * Some boards contain an EEPROM which is used to persistently store a list of + * bad pages which experiences ECC errors in vram. This interface provides + * a way to reset the EEPROM, e.g., after testing error injection. + * + * Usage: + * + * .. code-block:: bash + * + * echo 1 > ../ras/ras_eeprom_reset + * + * will reset EEPROM table to 0 entries. + * + */ +static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; + int ret; + + ret = amdgpu_ras_eeprom_reset_table(&adev->psp.ras.ras->eeprom_control); + + return ret == 1 ? size : -EIO; +} + +static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = { + .owner = THIS_MODULE, + .read = NULL, + .write = amdgpu_ras_debugfs_ctrl_write, + .llseek = default_llseek +}; + +static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = { + .owner = THIS_MODULE, + .read = NULL, + .write = amdgpu_ras_debugfs_eeprom_write, + .llseek = default_llseek +}; + +/** + * DOC: AMDGPU RAS sysfs Error Count Interface + * + * It allows the user to read the error count for each IP block on the gpu through + * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count + * + * It outputs the multiple lines which report the uncorrected (ue) and corrected + * (ce) error counts. + * + * The format of one line is below, + * + * [ce|ue]: count + * + * Example: + * + * .. code-block:: bash + * + * ue: 0 + * ce: 1 + * + */ +static ssize_t amdgpu_ras_sysfs_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ras_manager *obj = container_of(attr, struct ras_manager, sysfs_attr); + struct ras_query_if info = { + .head = obj->head, + }; + + if (amdgpu_ras_error_query(obj->adev, &info)) + return -EINVAL; + + return snprintf(buf, PAGE_SIZE, "%s: %lu\n%s: %lu\n", + "ue", info.ue_count, + "ce", info.ce_count); +} + +/* obj begin */ + +#define get_obj(obj) do { (obj)->use++; } while (0) +#define alive_obj(obj) ((obj)->use) + +static inline void put_obj(struct ras_manager *obj) +{ + if (obj && --obj->use == 0) + list_del(&obj->node); + if (obj && obj->use < 0) { + DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", obj->head.name); + } +} + +/* make one obj and return it. */ +static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev, + struct ras_common_if *head) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_manager *obj; + + if (!con) + return NULL; + + if (head->block >= AMDGPU_RAS_BLOCK_COUNT) + return NULL; + + obj = &con->objs[head->block]; + /* already exist. return obj? */ + if (alive_obj(obj)) + return NULL; + + obj->head = *head; + obj->adev = adev; + list_add(&obj->node, &con->head); + get_obj(obj); + + return obj; +} + +/* return an obj equal to head, or the first when head is NULL */ +static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, + struct ras_common_if *head) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_manager *obj; + int i; + + if (!con) + return NULL; + + if (head) { + if (head->block >= AMDGPU_RAS_BLOCK_COUNT) + return NULL; + + obj = &con->objs[head->block]; + + if (alive_obj(obj)) { + WARN_ON(head->block != obj->head.block); + return obj; + } + } else { + for (i = 0; i < AMDGPU_RAS_BLOCK_COUNT; i++) { + obj = &con->objs[i]; + if (alive_obj(obj)) { + WARN_ON(i != obj->head.block); + return obj; + } + } + } + + return NULL; +} +/* obj end */ + +/* feature ctl begin */ +static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev, + struct ras_common_if *head) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + return con->hw_supported & BIT(head->block); +} + +static int amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev, + struct ras_common_if *head) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + return con->features & BIT(head->block); +} + +/* + * if obj is not created, then create one. + * set feature enable flag. + */ +static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev, + struct ras_common_if *head, int enable) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_manager *obj = amdgpu_ras_find_obj(adev, head); + + /* If hardware does not support ras, then do not create obj. + * But if hardware support ras, we can create the obj. + * Ras framework checks con->hw_supported to see if it need do + * corresponding initialization. + * IP checks con->support to see if it need disable ras. + */ + if (!amdgpu_ras_is_feature_allowed(adev, head)) + return 0; + if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) + return 0; + + if (enable) { + if (!obj) { + obj = amdgpu_ras_create_obj(adev, head); + if (!obj) + return -EINVAL; + } else { + /* In case we create obj somewhere else */ + get_obj(obj); + } + con->features |= BIT(head->block); + } else { + if (obj && amdgpu_ras_is_feature_enabled(adev, head)) { + con->features &= ~BIT(head->block); + put_obj(obj); + } + } + + return 0; +} + +/* wrapper of psp_ras_enable_features */ +int amdgpu_ras_feature_enable(struct amdgpu_device *adev, + struct ras_common_if *head, bool enable) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + union ta_ras_cmd_input info; + int ret; + + if (!con) + return -EINVAL; + + if (!enable) { + info.disable_features = (struct ta_ras_disable_features_input) { + .block_id = amdgpu_ras_block_to_ta(head->block), + .error_type = amdgpu_ras_error_to_ta(head->type), + }; + } else { + info.enable_features = (struct ta_ras_enable_features_input) { + .block_id = amdgpu_ras_block_to_ta(head->block), + .error_type = amdgpu_ras_error_to_ta(head->type), + }; + } + + /* Do not enable if it is not allowed. */ + WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head)); + /* Are we alerady in that state we are going to set? */ + if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) + return 0; + + if (!amdgpu_ras_intr_triggered()) { + ret = psp_ras_enable_features(&adev->psp, &info, enable); + if (ret) { + DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n", + enable ? "enable":"disable", + ras_block_str(head->block), + ret); + if (ret == TA_RAS_STATUS__RESET_NEEDED) + return -EAGAIN; + return -EINVAL; + } + } + + /* setup the obj */ + __amdgpu_ras_feature_enable(adev, head, enable); + + return 0; +} + +/* Only used in device probe stage and called only once. */ +int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev, + struct ras_common_if *head, bool enable) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + int ret; + + if (!con) + return -EINVAL; + + if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) { + if (enable) { + /* There is no harm to issue a ras TA cmd regardless of + * the currecnt ras state. + * If current state == target state, it will do nothing + * But sometimes it requests driver to reset and repost + * with error code -EAGAIN. + */ + ret = amdgpu_ras_feature_enable(adev, head, 1); + /* With old ras TA, we might fail to enable ras. + * Log it and just setup the object. + * TODO need remove this WA in the future. + */ + if (ret == -EINVAL) { + ret = __amdgpu_ras_feature_enable(adev, head, 1); + if (!ret) + DRM_INFO("RAS INFO: %s setup object\n", + ras_block_str(head->block)); + } + } else { + /* setup the object then issue a ras TA disable cmd.*/ + ret = __amdgpu_ras_feature_enable(adev, head, 1); + if (ret) + return ret; + + ret = amdgpu_ras_feature_enable(adev, head, 0); + } + } else + ret = amdgpu_ras_feature_enable(adev, head, enable); + + return ret; +} + +static int amdgpu_ras_disable_all_features(struct amdgpu_device *adev, + bool bypass) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_manager *obj, *tmp; + + list_for_each_entry_safe(obj, tmp, &con->head, node) { + /* bypass psp. + * aka just release the obj and corresponding flags + */ + if (bypass) { + if (__amdgpu_ras_feature_enable(adev, &obj->head, 0)) + break; + } else { + if (amdgpu_ras_feature_enable(adev, &obj->head, 0)) + break; + } + } + + return con->features; +} + +static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev, + bool bypass) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + int ras_block_count = AMDGPU_RAS_BLOCK_COUNT; + int i; + const enum amdgpu_ras_error_type default_ras_type = + AMDGPU_RAS_ERROR__NONE; + + for (i = 0; i < ras_block_count; i++) { + struct ras_common_if head = { + .block = i, + .type = default_ras_type, + .sub_block_index = 0, + }; + strcpy(head.name, ras_block_str(i)); + if (bypass) { + /* + * bypass psp. vbios enable ras for us. + * so just create the obj + */ + if (__amdgpu_ras_feature_enable(adev, &head, 1)) + break; + } else { + if (amdgpu_ras_feature_enable(adev, &head, 1)) + break; + } + } + + return con->features; +} +/* feature ctl end */ + +/* query/inject/cure begin */ +int amdgpu_ras_error_query(struct amdgpu_device *adev, + struct ras_query_if *info) +{ + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); + struct ras_err_data err_data = {0, 0, 0, NULL}; + + if (!obj) + return -EINVAL; + + switch (info->head.block) { + case AMDGPU_RAS_BLOCK__UMC: + if (adev->umc.funcs->query_ras_error_count) + adev->umc.funcs->query_ras_error_count(adev, &err_data); + /* umc query_ras_error_address is also responsible for clearing + * error status + */ + if (adev->umc.funcs->query_ras_error_address) + adev->umc.funcs->query_ras_error_address(adev, &err_data); + break; + case AMDGPU_RAS_BLOCK__GFX: + if (adev->gfx.funcs->query_ras_error_count) + adev->gfx.funcs->query_ras_error_count(adev, &err_data); + break; + case AMDGPU_RAS_BLOCK__MMHUB: + if (adev->mmhub.funcs->query_ras_error_count) + adev->mmhub.funcs->query_ras_error_count(adev, &err_data); + break; + case AMDGPU_RAS_BLOCK__PCIE_BIF: + if (adev->nbio.funcs->query_ras_error_count) + adev->nbio.funcs->query_ras_error_count(adev, &err_data); + break; + default: + break; + } + + obj->err_data.ue_count += err_data.ue_count; + obj->err_data.ce_count += err_data.ce_count; + + info->ue_count = obj->err_data.ue_count; + info->ce_count = obj->err_data.ce_count; + + if (err_data.ce_count) { + dev_info(adev->dev, "%ld correctable errors detected in %s block\n", + obj->err_data.ce_count, ras_block_str(info->head.block)); + } + if (err_data.ue_count) { + dev_info(adev->dev, "%ld uncorrectable errors detected in %s block\n", + obj->err_data.ue_count, ras_block_str(info->head.block)); + } + + return 0; +} + +/* wrapper of psp_ras_trigger_error */ +int amdgpu_ras_error_inject(struct amdgpu_device *adev, + struct ras_inject_if *info) +{ + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); + struct ta_ras_trigger_error_input block_info = { + .block_id = amdgpu_ras_block_to_ta(info->head.block), + .inject_error_type = amdgpu_ras_error_to_ta(info->head.type), + .sub_block_index = info->head.sub_block_index, + .address = info->address, + .value = info->value, + }; + int ret = 0; + + if (!obj) + return -EINVAL; + + switch (info->head.block) { + case AMDGPU_RAS_BLOCK__GFX: + if (adev->gfx.funcs->ras_error_inject) + ret = adev->gfx.funcs->ras_error_inject(adev, info); + else + ret = -EINVAL; + break; + case AMDGPU_RAS_BLOCK__UMC: + case AMDGPU_RAS_BLOCK__MMHUB: + case AMDGPU_RAS_BLOCK__XGMI_WAFL: + case AMDGPU_RAS_BLOCK__PCIE_BIF: + ret = psp_ras_trigger_error(&adev->psp, &block_info); + break; + default: + DRM_INFO("%s error injection is not supported yet\n", + ras_block_str(info->head.block)); + ret = -EINVAL; + } + + if (ret) + DRM_ERROR("RAS ERROR: inject %s error failed ret %d\n", + ras_block_str(info->head.block), + ret); + + return ret; +} + +int amdgpu_ras_error_cure(struct amdgpu_device *adev, + struct ras_cure_if *info) +{ + /* psp fw has no cure interface for now. */ + return 0; +} + +/* get the total error counts on all IPs */ +unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, + bool is_ce) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_manager *obj; + struct ras_err_data data = {0, 0}; + + if (!con) + return 0; + + list_for_each_entry(obj, &con->head, node) { + struct ras_query_if info = { + .head = obj->head, + }; + + if (amdgpu_ras_error_query(adev, &info)) + return 0; + + data.ce_count += info.ce_count; + data.ue_count += info.ue_count; + } + + return is_ce ? data.ce_count : data.ue_count; +} +/* query/inject/cure end */ + + +/* sysfs begin */ + +static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, + struct ras_badpage **bps, unsigned int *count); + +static char *amdgpu_ras_badpage_flags_str(unsigned int flags) +{ + switch (flags) { + case AMDGPU_RAS_RETIRE_PAGE_RESERVED: + return "R"; + case AMDGPU_RAS_RETIRE_PAGE_PENDING: + return "P"; + case AMDGPU_RAS_RETIRE_PAGE_FAULT: + default: + return "F"; + }; +} + +/** + * DOC: AMDGPU RAS sysfs gpu_vram_bad_pages Interface + * + * It allows user to read the bad pages of vram on the gpu through + * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages + * + * It outputs multiple lines, and each line stands for one gpu page. + * + * The format of one line is below, + * gpu pfn : gpu page size : flags + * + * gpu pfn and gpu page size are printed in hex format. + * flags can be one of below character, + * + * R: reserved, this gpu page is reserved and not able to use. + * + * P: pending for reserve, this gpu page is marked as bad, will be reserved + * in next window of page_reserve. + * + * F: unable to reserve. this gpu page can't be reserved due to some reasons. + * + * Examples: + * + * .. code-block:: bash + * + * 0x00000001 : 0x00001000 : R + * 0x00000002 : 0x00001000 : P + * + */ + +static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f, + struct kobject *kobj, struct bin_attribute *attr, + char *buf, loff_t ppos, size_t count) +{ + struct amdgpu_ras *con = + container_of(attr, struct amdgpu_ras, badpages_attr); + struct amdgpu_device *adev = con->adev; + const unsigned int element_size = + sizeof("0xabcdabcd : 0x12345678 : R\n") - 1; + unsigned int start = div64_ul(ppos + element_size - 1, element_size); + unsigned int end = div64_ul(ppos + count - 1, element_size); + ssize_t s = 0; + struct ras_badpage *bps = NULL; + unsigned int bps_count = 0; + + memset(buf, 0, count); + + if (amdgpu_ras_badpages_read(adev, &bps, &bps_count)) + return 0; + + for (; start < end && start < bps_count; start++) + s += scnprintf(&buf[s], element_size + 1, + "0x%08x : 0x%08x : %1s\n", + bps[start].bp, + bps[start].size, + amdgpu_ras_badpage_flags_str(bps[start].flags)); + + kfree(bps); + + return s; +} + +static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct amdgpu_ras *con = + container_of(attr, struct amdgpu_ras, features_attr); + + return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features); +} + +static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct attribute *attrs[] = { + &con->features_attr.attr, + NULL + }; + struct bin_attribute *bin_attrs[] = { + &con->badpages_attr, + NULL + }; + struct attribute_group group = { + .name = "ras", + .attrs = attrs, + .bin_attrs = bin_attrs, + }; + + con->features_attr = (struct device_attribute) { + .attr = { + .name = "features", + .mode = S_IRUGO, + }, + .show = amdgpu_ras_sysfs_features_read, + }; + + con->badpages_attr = (struct bin_attribute) { + .attr = { + .name = "gpu_vram_bad_pages", + .mode = S_IRUGO, + }, + .size = 0, + .private = NULL, + .read = amdgpu_ras_sysfs_badpages_read, + }; + + sysfs_attr_init(attrs[0]); + sysfs_bin_attr_init(bin_attrs[0]); + + return sysfs_create_group(&adev->dev->kobj, &group); +} + +static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct attribute *attrs[] = { + &con->features_attr.attr, + NULL + }; + struct bin_attribute *bin_attrs[] = { + &con->badpages_attr, + NULL + }; + struct attribute_group group = { + .name = "ras", + .attrs = attrs, + .bin_attrs = bin_attrs, + }; + + sysfs_remove_group(&adev->dev->kobj, &group); + + return 0; +} + +int amdgpu_ras_sysfs_create(struct amdgpu_device *adev, + struct ras_fs_if *head) +{ + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head); + + if (!obj || obj->attr_inuse) + return -EINVAL; + + get_obj(obj); + + memcpy(obj->fs_data.sysfs_name, + head->sysfs_name, + sizeof(obj->fs_data.sysfs_name)); + + obj->sysfs_attr = (struct device_attribute){ + .attr = { + .name = obj->fs_data.sysfs_name, + .mode = S_IRUGO, + }, + .show = amdgpu_ras_sysfs_read, + }; + sysfs_attr_init(&obj->sysfs_attr.attr); + + if (sysfs_add_file_to_group(&adev->dev->kobj, + &obj->sysfs_attr.attr, + "ras")) { + put_obj(obj); + return -EINVAL; + } + + obj->attr_inuse = 1; + + return 0; +} + +int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev, + struct ras_common_if *head) +{ + struct ras_manager *obj = amdgpu_ras_find_obj(adev, head); + + if (!obj || !obj->attr_inuse) + return -EINVAL; + + sysfs_remove_file_from_group(&adev->dev->kobj, + &obj->sysfs_attr.attr, + "ras"); + obj->attr_inuse = 0; + put_obj(obj); + + return 0; +} + +static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_manager *obj, *tmp; + + list_for_each_entry_safe(obj, tmp, &con->head, node) { + amdgpu_ras_sysfs_remove(adev, &obj->head); + } + + amdgpu_ras_sysfs_remove_feature_node(adev); + + return 0; +} +/* sysfs end */ + +/** + * DOC: AMDGPU RAS Reboot Behavior for Unrecoverable Errors + * + * Normally when there is an uncorrectable error, the driver will reset + * the GPU to recover. However, in the event of an unrecoverable error, + * the driver provides an interface to reboot the system automatically + * in that event. + * + * The following file in debugfs provides that interface: + * /sys/kernel/debug/dri/[0/1/2...]/ras/auto_reboot + * + * Usage: + * + * .. code-block:: bash + * + * echo true > .../ras/auto_reboot + * + */ +/* debugfs begin */ +static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct drm_minor *minor = adev->ddev->primary; + + con->dir = debugfs_create_dir("ras", minor->debugfs_root); + debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, con->dir, + adev, &amdgpu_ras_debugfs_ctrl_ops); + debugfs_create_file("ras_eeprom_reset", S_IWUGO | S_IRUGO, con->dir, + adev, &amdgpu_ras_debugfs_eeprom_ops); + + /* + * After one uncorrectable error happens, usually GPU recovery will + * be scheduled. But due to the known problem in GPU recovery failing + * to bring GPU back, below interface provides one direct way to + * user to reboot system automatically in such case within + * ERREVENT_ATHUB_INTERRUPT generated. Normal GPU recovery routine + * will never be called. + */ + debugfs_create_bool("auto_reboot", S_IWUGO | S_IRUGO, con->dir, + &con->reboot); +} + +void amdgpu_ras_debugfs_create(struct amdgpu_device *adev, + struct ras_fs_if *head) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head); + + if (!obj || obj->ent) + return; + + get_obj(obj); + + memcpy(obj->fs_data.debugfs_name, + head->debugfs_name, + sizeof(obj->fs_data.debugfs_name)); + + obj->ent = debugfs_create_file(obj->fs_data.debugfs_name, + S_IWUGO | S_IRUGO, con->dir, obj, + &amdgpu_ras_debugfs_ops); +} + +void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev, + struct ras_common_if *head) +{ + struct ras_manager *obj = amdgpu_ras_find_obj(adev, head); + + if (!obj || !obj->ent) + return; + + debugfs_remove(obj->ent); + obj->ent = NULL; + put_obj(obj); +} + +static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_manager *obj, *tmp; + + list_for_each_entry_safe(obj, tmp, &con->head, node) { + amdgpu_ras_debugfs_remove(adev, &obj->head); + } + + debugfs_remove_recursive(con->dir); + con->dir = NULL; +} +/* debugfs end */ + +/* ras fs */ + +static int amdgpu_ras_fs_init(struct amdgpu_device *adev) +{ + amdgpu_ras_sysfs_create_feature_node(adev); + amdgpu_ras_debugfs_create_ctrl_node(adev); + + return 0; +} + +static int amdgpu_ras_fs_fini(struct amdgpu_device *adev) +{ + amdgpu_ras_debugfs_remove_all(adev); + amdgpu_ras_sysfs_remove_all(adev); + return 0; +} +/* ras fs end */ + +/* ih begin */ +static void amdgpu_ras_interrupt_handler(struct ras_manager *obj) +{ + struct ras_ih_data *data = &obj->ih_data; + struct amdgpu_iv_entry entry; + int ret; + struct ras_err_data err_data = {0, 0, 0, NULL}; + + while (data->rptr != data->wptr) { + rmb(); + memcpy(&entry, &data->ring[data->rptr], + data->element_size); + + wmb(); + data->rptr = (data->aligned_element_size + + data->rptr) % data->ring_size; + + /* Let IP handle its data, maybe we need get the output + * from the callback to udpate the error type/count, etc + */ + if (data->cb) { + ret = data->cb(obj->adev, &err_data, &entry); + /* ue will trigger an interrupt, and in that case + * we need do a reset to recovery the whole system. + * But leave IP do that recovery, here we just dispatch + * the error. + */ + if (ret == AMDGPU_RAS_SUCCESS) { + /* these counts could be left as 0 if + * some blocks do not count error number + */ + obj->err_data.ue_count += err_data.ue_count; + obj->err_data.ce_count += err_data.ce_count; + } + } + } +} + +static void amdgpu_ras_interrupt_process_handler(struct work_struct *work) +{ + struct ras_ih_data *data = + container_of(work, struct ras_ih_data, ih_work); + struct ras_manager *obj = + container_of(data, struct ras_manager, ih_data); + + amdgpu_ras_interrupt_handler(obj); +} + +int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev, + struct ras_dispatch_if *info) +{ + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); + struct ras_ih_data *data = &obj->ih_data; + + if (!obj) + return -EINVAL; + + if (data->inuse == 0) + return 0; + + /* Might be overflow... */ + memcpy(&data->ring[data->wptr], info->entry, + data->element_size); + + wmb(); + data->wptr = (data->aligned_element_size + + data->wptr) % data->ring_size; + + schedule_work(&data->ih_work); + + return 0; +} + +int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev, + struct ras_ih_if *info) +{ + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); + struct ras_ih_data *data; + + if (!obj) + return -EINVAL; + + data = &obj->ih_data; + if (data->inuse == 0) + return 0; + + cancel_work_sync(&data->ih_work); + + kfree(data->ring); + memset(data, 0, sizeof(*data)); + put_obj(obj); + + return 0; +} + +int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev, + struct ras_ih_if *info) +{ + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); + struct ras_ih_data *data; + + if (!obj) { + /* in case we registe the IH before enable ras feature */ + obj = amdgpu_ras_create_obj(adev, &info->head); + if (!obj) + return -EINVAL; + } else + get_obj(obj); + + data = &obj->ih_data; + /* add the callback.etc */ + *data = (struct ras_ih_data) { + .inuse = 0, + .cb = info->cb, + .element_size = sizeof(struct amdgpu_iv_entry), + .rptr = 0, + .wptr = 0, + }; + + INIT_WORK(&data->ih_work, amdgpu_ras_interrupt_process_handler); + + data->aligned_element_size = ALIGN(data->element_size, 8); + /* the ring can store 64 iv entries. */ + data->ring_size = 64 * data->aligned_element_size; + data->ring = kmalloc(data->ring_size, GFP_KERNEL); + if (!data->ring) { + put_obj(obj); + return -ENOMEM; + } + + /* IH is ready */ + data->inuse = 1; + + return 0; +} + +static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_manager *obj, *tmp; + + list_for_each_entry_safe(obj, tmp, &con->head, node) { + struct ras_ih_if info = { + .head = obj->head, + }; + amdgpu_ras_interrupt_remove_handler(adev, &info); + } + + return 0; +} +/* ih end */ + +/* recovery begin */ + +/* return 0 on success. + * caller need free bps. + */ +static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, + struct ras_badpage **bps, unsigned int *count) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_handler_data *data; + int i = 0; + int ret = 0; + + if (!con || !con->eh_data || !bps || !count) + return -EINVAL; + + mutex_lock(&con->recovery_lock); + data = con->eh_data; + if (!data || data->count == 0) { + *bps = NULL; + goto out; + } + + *bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL); + if (!*bps) { + ret = -ENOMEM; + goto out; + } + + for (; i < data->count; i++) { + (*bps)[i] = (struct ras_badpage){ + .bp = data->bps[i].retired_page, + .size = AMDGPU_GPU_PAGE_SIZE, + .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED, + }; + + if (data->last_reserved <= i) + (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING; + else if (data->bps_bo[i] == NULL) + (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT; + } + + *count = data->count; +out: + mutex_unlock(&con->recovery_lock); + return ret; +} + +static void amdgpu_ras_do_recovery(struct work_struct *work) +{ + struct amdgpu_ras *ras = + container_of(work, struct amdgpu_ras, recovery_work); + + amdgpu_device_gpu_recover(ras->adev, 0); + atomic_set(&ras->in_recovery, 0); +} + +/* alloc/realloc bps array */ +static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev, + struct ras_err_handler_data *data, int pages) +{ + unsigned int old_space = data->count + data->space_left; + unsigned int new_space = old_space + pages; + unsigned int align_space = ALIGN(new_space, 512); + void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL); + struct amdgpu_bo **bps_bo = + kmalloc(align_space * sizeof(*data->bps_bo), GFP_KERNEL); + + if (!bps || !bps_bo) { + kfree(bps); + kfree(bps_bo); + return -ENOMEM; + } + + if (data->bps) { + memcpy(bps, data->bps, + data->count * sizeof(*data->bps)); + kfree(data->bps); + } + if (data->bps_bo) { + memcpy(bps_bo, data->bps_bo, + data->count * sizeof(*data->bps_bo)); + kfree(data->bps_bo); + } + + data->bps = bps; + data->bps_bo = bps_bo; + data->space_left += align_space - old_space; + return 0; +} + +/* it deal with vram only. */ +int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, + struct eeprom_table_record *bps, int pages) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_handler_data *data; + int ret = 0; + + if (!con || !con->eh_data || !bps || pages <= 0) + return 0; + + mutex_lock(&con->recovery_lock); + data = con->eh_data; + if (!data) + goto out; + + if (data->space_left <= pages) + if (amdgpu_ras_realloc_eh_data_space(adev, data, pages)) { + ret = -ENOMEM; + goto out; + } + + memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps)); + data->count += pages; + data->space_left -= pages; + +out: + mutex_unlock(&con->recovery_lock); + + return ret; +} + +/* + * write error record array to eeprom, the function should be + * protected by recovery_lock + */ +static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_handler_data *data; + struct amdgpu_ras_eeprom_control *control; + int save_count; + + if (!con || !con->eh_data) + return 0; + + control = &con->eeprom_control; + data = con->eh_data; + save_count = data->count - control->num_recs; + /* only new entries are saved */ + if (save_count > 0) + if (amdgpu_ras_eeprom_process_recods(control, + &data->bps[control->num_recs], + true, + save_count)) { + DRM_ERROR("Failed to save EEPROM table data!"); + return -EIO; + } + + return 0; +} + +/* + * read error record array in eeprom and reserve enough space for + * storing new bad pages + */ +static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev) +{ + struct amdgpu_ras_eeprom_control *control = + &adev->psp.ras.ras->eeprom_control; + struct eeprom_table_record *bps = NULL; + int ret = 0; + + /* no bad page record, skip eeprom access */ + if (!control->num_recs) + return ret; + + bps = kcalloc(control->num_recs, sizeof(*bps), GFP_KERNEL); + if (!bps) + return -ENOMEM; + + if (amdgpu_ras_eeprom_process_recods(control, bps, false, + control->num_recs)) { + DRM_ERROR("Failed to load EEPROM table records!"); + ret = -EIO; + goto out; + } + + ret = amdgpu_ras_add_bad_pages(adev, bps, control->num_recs); + +out: + kfree(bps); + return ret; +} + +/* + * check if an address belongs to bad page + * + * Note: this check is only for umc block + */ +static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, + uint64_t addr) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_handler_data *data; + int i; + bool ret = false; + + if (!con || !con->eh_data) + return ret; + + mutex_lock(&con->recovery_lock); + data = con->eh_data; + if (!data) + goto out; + + addr >>= AMDGPU_GPU_PAGE_SHIFT; + for (i = 0; i < data->count; i++) + if (addr == data->bps[i].retired_page) { + ret = true; + goto out; + } + +out: + mutex_unlock(&con->recovery_lock); + return ret; +} + +/* called in gpu recovery/init */ +int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_handler_data *data; + uint64_t bp; + struct amdgpu_bo *bo = NULL; + int i, ret = 0; + + if (!con || !con->eh_data) + return 0; + + mutex_lock(&con->recovery_lock); + data = con->eh_data; + if (!data) + goto out; + /* reserve vram at driver post stage. */ + for (i = data->last_reserved; i < data->count; i++) { + bp = data->bps[i].retired_page; + + /* There are two cases of reserve error should be ignored: + * 1) a ras bad page has been allocated (used by someone); + * 2) a ras bad page has been reserved (duplicate error injection + * for one page); + */ + if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT, + AMDGPU_GPU_PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &bo, NULL)) + DRM_WARN("RAS WARN: reserve vram for retired page %llx fail\n", bp); + + data->bps_bo[i] = bo; + data->last_reserved = i + 1; + bo = NULL; + } + + /* continue to save bad pages to eeprom even reesrve_vram fails */ + ret = amdgpu_ras_save_bad_pages(adev); +out: + mutex_unlock(&con->recovery_lock); + return ret; +} + +/* called when driver unload */ +static int amdgpu_ras_release_bad_pages(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_handler_data *data; + struct amdgpu_bo *bo; + int i; + + if (!con || !con->eh_data) + return 0; + + mutex_lock(&con->recovery_lock); + data = con->eh_data; + if (!data) + goto out; + + for (i = data->last_reserved - 1; i >= 0; i--) { + bo = data->bps_bo[i]; + + amdgpu_bo_free_kernel(&bo, NULL, NULL); + + data->bps_bo[i] = bo; + data->last_reserved = i; + } +out: + mutex_unlock(&con->recovery_lock); + return 0; +} + +int amdgpu_ras_recovery_init(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_handler_data **data; + int ret; + + if (con) + data = &con->eh_data; + else + return 0; + + *data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO); + if (!*data) { + ret = -ENOMEM; + goto out; + } + + mutex_init(&con->recovery_lock); + INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery); + atomic_set(&con->in_recovery, 0); + con->adev = adev; + + ret = amdgpu_ras_eeprom_init(&con->eeprom_control); + if (ret) + goto free; + + if (con->eeprom_control.num_recs) { + ret = amdgpu_ras_load_bad_pages(adev); + if (ret) + goto free; + ret = amdgpu_ras_reserve_bad_pages(adev); + if (ret) + goto release; + } + + return 0; + +release: + amdgpu_ras_release_bad_pages(adev); +free: + kfree((*data)->bps); + kfree((*data)->bps_bo); + kfree(*data); + con->eh_data = NULL; +out: + DRM_WARN("Failed to initialize ras recovery!\n"); + + return ret; +} + +static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_handler_data *data = con->eh_data; + + /* recovery_init failed to init it, fini is useless */ + if (!data) + return 0; + + cancel_work_sync(&con->recovery_work); + amdgpu_ras_release_bad_pages(adev); + + mutex_lock(&con->recovery_lock); + con->eh_data = NULL; + kfree(data->bps); + kfree(data->bps_bo); + kfree(data); + mutex_unlock(&con->recovery_lock); + + return 0; +} +/* recovery end */ + +/* return 0 if ras will reset gpu and repost.*/ +int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, + unsigned int block) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + if (!ras) + return -EINVAL; + + ras->flags |= AMDGPU_RAS_FLAG_INIT_NEED_RESET; + return 0; +} + +/* + * check hardware's ras ability which will be saved in hw_supported. + * if hardware does not support ras, we can skip some ras initializtion and + * forbid some ras operations from IP. + * if software itself, say boot parameter, limit the ras ability. We still + * need allow IP do some limited operations, like disable. In such case, + * we have to initialize ras as normal. but need check if operation is + * allowed or not in each function. + */ +static void amdgpu_ras_check_supported(struct amdgpu_device *adev, + uint32_t *hw_supported, uint32_t *supported) +{ + *hw_supported = 0; + *supported = 0; + + if (amdgpu_sriov_vf(adev) || + adev->asic_type != CHIP_VEGA20) + return; + + if (adev->is_atom_fw && + (amdgpu_atomfirmware_mem_ecc_supported(adev) || + amdgpu_atomfirmware_sram_ecc_supported(adev))) + *hw_supported = AMDGPU_RAS_BLOCK_MASK; + + *supported = amdgpu_ras_enable == 0 ? + 0 : *hw_supported & amdgpu_ras_mask; +} + +int amdgpu_ras_init(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + int r; + + if (con) + return 0; + + con = kmalloc(sizeof(struct amdgpu_ras) + + sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNT, + GFP_KERNEL|__GFP_ZERO); + if (!con) + return -ENOMEM; + + con->objs = (struct ras_manager *)(con + 1); + + amdgpu_ras_set_context(adev, con); + + amdgpu_ras_check_supported(adev, &con->hw_supported, + &con->supported); + if (!con->hw_supported) { + amdgpu_ras_set_context(adev, NULL); + kfree(con); + return 0; + } + + con->features = 0; + INIT_LIST_HEAD(&con->head); + /* Might need get this flag from vbios. */ + con->flags = RAS_DEFAULT_FLAGS; + + if (adev->nbio.funcs->init_ras_controller_interrupt) { + r = adev->nbio.funcs->init_ras_controller_interrupt(adev); + if (r) + return r; + } + + if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) { + r = adev->nbio.funcs->init_ras_err_event_athub_interrupt(adev); + if (r) + return r; + } + + amdgpu_ras_mask &= AMDGPU_RAS_BLOCK_MASK; + + if (amdgpu_ras_fs_init(adev)) + goto fs_out; + + DRM_INFO("RAS INFO: ras initialized successfully, " + "hardware ability[%x] ras_mask[%x]\n", + con->hw_supported, con->supported); + return 0; +fs_out: + amdgpu_ras_set_context(adev, NULL); + kfree(con); + + return -EINVAL; +} + +/* helper function to handle common stuff in ip late init phase */ +int amdgpu_ras_late_init(struct amdgpu_device *adev, + struct ras_common_if *ras_block, + struct ras_fs_if *fs_info, + struct ras_ih_if *ih_info) +{ + int r; + + /* disable RAS feature per IP block if it is not supported */ + if (!amdgpu_ras_is_supported(adev, ras_block->block)) { + amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0); + return 0; + } + + r = amdgpu_ras_feature_enable_on_boot(adev, ras_block, 1); + if (r) { + if (r == -EAGAIN) { + /* request gpu reset. will run again */ + amdgpu_ras_request_reset_on_boot(adev, + ras_block->block); + return 0; + } else if (adev->in_suspend || adev->in_gpu_reset) { + /* in resume phase, if fail to enable ras, + * clean up all ras fs nodes, and disable ras */ + goto cleanup; + } else + return r; + } + + /* in resume phase, no need to create ras fs node */ + if (adev->in_suspend || adev->in_gpu_reset) + return 0; + + if (ih_info->cb) { + r = amdgpu_ras_interrupt_add_handler(adev, ih_info); + if (r) + goto interrupt; + } + + amdgpu_ras_debugfs_create(adev, fs_info); + + r = amdgpu_ras_sysfs_create(adev, fs_info); + if (r) + goto sysfs; + + return 0; +cleanup: + amdgpu_ras_sysfs_remove(adev, ras_block); +sysfs: + amdgpu_ras_debugfs_remove(adev, ras_block); + if (ih_info->cb) + amdgpu_ras_interrupt_remove_handler(adev, ih_info); +interrupt: + amdgpu_ras_feature_enable(adev, ras_block, 0); + return r; +} + +/* helper function to remove ras fs node and interrupt handler */ +void amdgpu_ras_late_fini(struct amdgpu_device *adev, + struct ras_common_if *ras_block, + struct ras_ih_if *ih_info) +{ + if (!ras_block || !ih_info) + return; + + amdgpu_ras_sysfs_remove(adev, ras_block); + amdgpu_ras_debugfs_remove(adev, ras_block); + if (ih_info->cb) + amdgpu_ras_interrupt_remove_handler(adev, ih_info); + amdgpu_ras_feature_enable(adev, ras_block, 0); +} + +/* do some init work after IP late init as dependence. + * and it runs in resume/gpu reset/booting up cases. + */ +void amdgpu_ras_resume(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_manager *obj, *tmp; + + if (!con) + return; + + if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) { + /* Set up all other IPs which are not implemented. There is a + * tricky thing that IP's actual ras error type should be + * MULTI_UNCORRECTABLE, but as driver does not handle it, so + * ERROR_NONE make sense anyway. + */ + amdgpu_ras_enable_all_features(adev, 1); + + /* We enable ras on all hw_supported block, but as boot + * parameter might disable some of them and one or more IP has + * not implemented yet. So we disable them on behalf. + */ + list_for_each_entry_safe(obj, tmp, &con->head, node) { + if (!amdgpu_ras_is_supported(adev, obj->head.block)) { + amdgpu_ras_feature_enable(adev, &obj->head, 0); + /* there should be no any reference. */ + WARN_ON(alive_obj(obj)); + } + } + } + + if (con->flags & AMDGPU_RAS_FLAG_INIT_NEED_RESET) { + con->flags &= ~AMDGPU_RAS_FLAG_INIT_NEED_RESET; + /* setup ras obj state as disabled. + * for init_by_vbios case. + * if we want to enable ras, just enable it in a normal way. + * If we want do disable it, need setup ras obj as enabled, + * then issue another TA disable cmd. + * See feature_enable_on_boot + */ + amdgpu_ras_disable_all_features(adev, 1); + amdgpu_ras_reset_gpu(adev, 0); + } +} + +void amdgpu_ras_suspend(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + if (!con) + return; + + amdgpu_ras_disable_all_features(adev, 0); + /* Make sure all ras objects are disabled. */ + if (con->features) + amdgpu_ras_disable_all_features(adev, 1); +} + +/* do some fini work before IP fini as dependence */ +int amdgpu_ras_pre_fini(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + if (!con) + return 0; + + /* Need disable ras on all IPs here before ip [hw/sw]fini */ + amdgpu_ras_disable_all_features(adev, 0); + amdgpu_ras_recovery_fini(adev); + return 0; +} + +int amdgpu_ras_fini(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + if (!con) + return 0; + + amdgpu_ras_fs_fini(adev); + amdgpu_ras_interrupt_remove_all(adev); + + WARN(con->features, "Feature mask is not cleared"); + + if (con->features) + amdgpu_ras_disable_all_features(adev, 1); + + amdgpu_ras_set_context(adev, NULL); + kfree(con); + + return 0; +} + +void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) +{ + uint32_t hw_supported, supported; + + amdgpu_ras_check_supported(adev, &hw_supported, &supported); + if (!hw_supported) + return; + + if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { + DRM_WARN("RAS event of type ERREVENT_ATHUB_INTERRUPT detected!\n"); + + amdgpu_ras_reset_gpu(adev, false); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h new file mode 100644 index 000000000000..f80fd3428c98 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -0,0 +1,623 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * + */ +#ifndef _AMDGPU_RAS_H +#define _AMDGPU_RAS_H + +#include <linux/debugfs.h> +#include <linux/list.h> +#include "amdgpu.h" +#include "amdgpu_psp.h" +#include "ta_ras_if.h" +#include "amdgpu_ras_eeprom.h" + +enum amdgpu_ras_block { + AMDGPU_RAS_BLOCK__UMC = 0, + AMDGPU_RAS_BLOCK__SDMA, + AMDGPU_RAS_BLOCK__GFX, + AMDGPU_RAS_BLOCK__MMHUB, + AMDGPU_RAS_BLOCK__ATHUB, + AMDGPU_RAS_BLOCK__PCIE_BIF, + AMDGPU_RAS_BLOCK__HDP, + AMDGPU_RAS_BLOCK__XGMI_WAFL, + AMDGPU_RAS_BLOCK__DF, + AMDGPU_RAS_BLOCK__SMN, + AMDGPU_RAS_BLOCK__SEM, + AMDGPU_RAS_BLOCK__MP0, + AMDGPU_RAS_BLOCK__MP1, + AMDGPU_RAS_BLOCK__FUSE, + + AMDGPU_RAS_BLOCK__LAST +}; + +#define AMDGPU_RAS_BLOCK_COUNT AMDGPU_RAS_BLOCK__LAST +#define AMDGPU_RAS_BLOCK_MASK ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1) + +enum amdgpu_ras_gfx_subblock { + /* CPC */ + AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0, + AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH = + AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPC_UCODE, + AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1, + AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1, + AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1, + AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2, + AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2, + AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2, + AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2, + /* CPF */ + AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 = + AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1, + AMDGPU_RAS_BLOCK__GFX_CPF_TAG, + AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG, + /* CPG */ + AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ = + AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG, + AMDGPU_RAS_BLOCK__GFX_CPG_TAG, + AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG, + /* GDS */ + AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, + AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, + AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, + AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, + AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, + /* SPI */ + AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM, + /* SQ */ + AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D, + AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I, + AMDGPU_RAS_BLOCK__GFX_SQ_VGPR, + AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR, + /* SQC (3 ranges) */ + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START, + /* SQC range 0 */ + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END = + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, + /* SQC range 1 */ + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END = + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, + /* SQC range 2 */ + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END = + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END, + /* TA */ + AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO = + AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO, + AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO, + AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO, + AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO, + AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO, + /* TCA */ + AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO = + AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO, + /* TCC (5 sub-ranges) */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START, + /* TCC range 0 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, + AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, + AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END = + AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, + /* TCC range 1 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END = + AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, + /* TCC range 2 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, + AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN, + AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, + AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END = + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, + /* TCC range 3 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START, + AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START, + AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END = + AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, + /* TCC range 4 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START, + AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START, + AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END = + AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END, + /* TCI */ + AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM, + /* TCP */ + AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM = + AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM, + AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM, + AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, + AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, + AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, + /* TD */ + AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO = + AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI, + AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO, + AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO, + /* EA (3 sub-ranges) */ + AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START, + /* EA range 0 */ + AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, + AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM, + AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, + AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END = + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, + /* EA range 1 */ + AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, + AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, + AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END = + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, + /* EA range 2 */ + AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM, + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM, + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM, + AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END = + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM, + AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END, + /* UTC VM L2 bank */ + AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE, + /* UTC VM walker */ + AMDGPU_RAS_BLOCK__UTC_VML2_WALKER, + /* UTC ATC L2 2MB cache */ + AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, + /* UTC ATC L2 4KB cache */ + AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, + AMDGPU_RAS_BLOCK__GFX_MAX +}; + +enum amdgpu_ras_error_type { + AMDGPU_RAS_ERROR__NONE = 0, + AMDGPU_RAS_ERROR__PARITY = 1, + AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE = 2, + AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE = 4, + AMDGPU_RAS_ERROR__POISON = 8, +}; + +enum amdgpu_ras_ret { + AMDGPU_RAS_SUCCESS = 0, + AMDGPU_RAS_FAIL, + AMDGPU_RAS_UE, + AMDGPU_RAS_CE, + AMDGPU_RAS_PT, +}; + +struct ras_common_if { + enum amdgpu_ras_block block; + enum amdgpu_ras_error_type type; + uint32_t sub_block_index; + /* block name */ + char name[32]; +}; + +struct amdgpu_ras { + /* ras infrastructure */ + /* for ras itself. */ + uint32_t hw_supported; + /* for IP to check its ras ability. */ + uint32_t supported; + uint32_t features; + struct list_head head; + /* debugfs */ + struct dentry *dir; + /* sysfs */ + struct device_attribute features_attr; + struct bin_attribute badpages_attr; + /* block array */ + struct ras_manager *objs; + + /* gpu recovery */ + struct work_struct recovery_work; + atomic_t in_recovery; + struct amdgpu_device *adev; + /* error handler data */ + struct ras_err_handler_data *eh_data; + struct mutex recovery_lock; + + uint32_t flags; + bool reboot; + struct amdgpu_ras_eeprom_control eeprom_control; +}; + +struct ras_fs_data { + char sysfs_name[32]; + char debugfs_name[32]; +}; + +struct ras_err_data { + unsigned long ue_count; + unsigned long ce_count; + unsigned long err_addr_cnt; + struct eeprom_table_record *err_addr; +}; + +struct ras_err_handler_data { + /* point to bad page records array */ + struct eeprom_table_record *bps; + /* point to reserved bo array */ + struct amdgpu_bo **bps_bo; + /* the count of entries */ + int count; + /* the space can place new entries */ + int space_left; + /* last reserved entry's index + 1 */ + int last_reserved; +}; + +typedef int (*ras_ih_cb)(struct amdgpu_device *adev, + void *err_data, + struct amdgpu_iv_entry *entry); + +struct ras_ih_data { + /* interrupt bottom half */ + struct work_struct ih_work; + int inuse; + /* IP callback */ + ras_ih_cb cb; + /* full of entries */ + unsigned char *ring; + unsigned int ring_size; + unsigned int element_size; + unsigned int aligned_element_size; + unsigned int rptr; + unsigned int wptr; +}; + +struct ras_manager { + struct ras_common_if head; + /* reference count */ + int use; + /* ras block link */ + struct list_head node; + /* the device */ + struct amdgpu_device *adev; + /* debugfs */ + struct dentry *ent; + /* sysfs */ + struct device_attribute sysfs_attr; + int attr_inuse; + + /* fs node name */ + struct ras_fs_data fs_data; + + /* IH data */ + struct ras_ih_data ih_data; + + struct ras_err_data err_data; +}; + +struct ras_badpage { + unsigned int bp; + unsigned int size; + unsigned int flags; +}; + +/* interfaces for IP */ +struct ras_fs_if { + struct ras_common_if head; + char sysfs_name[32]; + char debugfs_name[32]; +}; + +struct ras_query_if { + struct ras_common_if head; + unsigned long ue_count; + unsigned long ce_count; +}; + +struct ras_inject_if { + struct ras_common_if head; + uint64_t address; + uint64_t value; +}; + +struct ras_cure_if { + struct ras_common_if head; + uint64_t address; +}; + +struct ras_ih_if { + struct ras_common_if head; + ras_ih_cb cb; +}; + +struct ras_dispatch_if { + struct ras_common_if head; + struct amdgpu_iv_entry *entry; +}; + +struct ras_debug_if { + union { + struct ras_common_if head; + struct ras_inject_if inject; + }; + int op; +}; +/* work flow + * vbios + * 1: ras feature enable (enabled by default) + * psp + * 2: ras framework init (in ip_init) + * IP + * 3: IH add + * 4: debugfs/sysfs create + * 5: query/inject + * 6: debugfs/sysfs remove + * 7: IH remove + * 8: feature disable + */ + +#define amdgpu_ras_get_context(adev) ((adev)->psp.ras.ras) +#define amdgpu_ras_set_context(adev, ras_con) ((adev)->psp.ras.ras = (ras_con)) + +/* check if ras is supported on block, say, sdma, gfx */ +static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, + unsigned int block) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + if (block >= AMDGPU_RAS_BLOCK_COUNT) + return 0; + return ras && (ras->supported & (1 << block)); +} + +int amdgpu_ras_recovery_init(struct amdgpu_device *adev); +int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, + unsigned int block); + +void amdgpu_ras_resume(struct amdgpu_device *adev); +void amdgpu_ras_suspend(struct amdgpu_device *adev); + +unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, + bool is_ce); + +/* error handling functions */ +int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, + struct eeprom_table_record *bps, int pages); + +int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev); + +static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev, + bool is_baco) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + /* save bad page to eeprom before gpu reset, + * i2c may be unstable in gpu reset + */ + if (in_task()) + amdgpu_ras_reserve_bad_pages(adev); + + if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) + schedule_work(&ras->recovery_work); + return 0; +} + +static inline enum ta_ras_block +amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) { + switch (block) { + case AMDGPU_RAS_BLOCK__UMC: + return TA_RAS_BLOCK__UMC; + case AMDGPU_RAS_BLOCK__SDMA: + return TA_RAS_BLOCK__SDMA; + case AMDGPU_RAS_BLOCK__GFX: + return TA_RAS_BLOCK__GFX; + case AMDGPU_RAS_BLOCK__MMHUB: + return TA_RAS_BLOCK__MMHUB; + case AMDGPU_RAS_BLOCK__ATHUB: + return TA_RAS_BLOCK__ATHUB; + case AMDGPU_RAS_BLOCK__PCIE_BIF: + return TA_RAS_BLOCK__PCIE_BIF; + case AMDGPU_RAS_BLOCK__HDP: + return TA_RAS_BLOCK__HDP; + case AMDGPU_RAS_BLOCK__XGMI_WAFL: + return TA_RAS_BLOCK__XGMI_WAFL; + case AMDGPU_RAS_BLOCK__DF: + return TA_RAS_BLOCK__DF; + case AMDGPU_RAS_BLOCK__SMN: + return TA_RAS_BLOCK__SMN; + case AMDGPU_RAS_BLOCK__SEM: + return TA_RAS_BLOCK__SEM; + case AMDGPU_RAS_BLOCK__MP0: + return TA_RAS_BLOCK__MP0; + case AMDGPU_RAS_BLOCK__MP1: + return TA_RAS_BLOCK__MP1; + case AMDGPU_RAS_BLOCK__FUSE: + return TA_RAS_BLOCK__FUSE; + default: + WARN_ONCE(1, "RAS ERROR: unexpected block id %d\n", block); + return TA_RAS_BLOCK__UMC; + } +} + +static inline enum ta_ras_error_type +amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) { + switch (error) { + case AMDGPU_RAS_ERROR__NONE: + return TA_RAS_ERROR__NONE; + case AMDGPU_RAS_ERROR__PARITY: + return TA_RAS_ERROR__PARITY; + case AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE: + return TA_RAS_ERROR__SINGLE_CORRECTABLE; + case AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE: + return TA_RAS_ERROR__MULTI_UNCORRECTABLE; + case AMDGPU_RAS_ERROR__POISON: + return TA_RAS_ERROR__POISON; + default: + WARN_ONCE(1, "RAS ERROR: unexpected error type %d\n", error); + return TA_RAS_ERROR__NONE; + } +} + +/* called in ip_init and ip_fini */ +int amdgpu_ras_init(struct amdgpu_device *adev); +int amdgpu_ras_fini(struct amdgpu_device *adev); +int amdgpu_ras_pre_fini(struct amdgpu_device *adev); +int amdgpu_ras_late_init(struct amdgpu_device *adev, + struct ras_common_if *ras_block, + struct ras_fs_if *fs_info, + struct ras_ih_if *ih_info); +void amdgpu_ras_late_fini(struct amdgpu_device *adev, + struct ras_common_if *ras_block, + struct ras_ih_if *ih_info); + +int amdgpu_ras_feature_enable(struct amdgpu_device *adev, + struct ras_common_if *head, bool enable); + +int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev, + struct ras_common_if *head, bool enable); + +int amdgpu_ras_sysfs_create(struct amdgpu_device *adev, + struct ras_fs_if *head); + +int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev, + struct ras_common_if *head); + +void amdgpu_ras_debugfs_create(struct amdgpu_device *adev, + struct ras_fs_if *head); + +void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev, + struct ras_common_if *head); + +int amdgpu_ras_error_query(struct amdgpu_device *adev, + struct ras_query_if *info); + +int amdgpu_ras_error_inject(struct amdgpu_device *adev, + struct ras_inject_if *info); + +int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev, + struct ras_ih_if *info); + +int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev, + struct ras_ih_if *info); + +int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev, + struct ras_dispatch_if *info); + +extern atomic_t amdgpu_ras_in_intr; + +static inline bool amdgpu_ras_intr_triggered(void) +{ + return !!atomic_read(&amdgpu_ras_in_intr); +} + +void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c new file mode 100644 index 000000000000..7de16c0c2f20 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -0,0 +1,518 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu_ras_eeprom.h" +#include "amdgpu.h" +#include "amdgpu_ras.h" +#include <linux/bits.h> +#include "smu_v11_0_i2c.h" + +#define EEPROM_I2C_TARGET_ADDR 0xA0 + +/* + * The 2 macros bellow represent the actual size in bytes that + * those entities occupy in the EEPROM memory. + * EEPROM_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which + * uses uint64 to store 6b fields such as retired_page. + */ +#define EEPROM_TABLE_HEADER_SIZE 20 +#define EEPROM_TABLE_RECORD_SIZE 24 + +#define EEPROM_ADDRESS_SIZE 0x2 + +/* Table hdr is 'AMDR' */ +#define EEPROM_TABLE_HDR_VAL 0x414d4452 +#define EEPROM_TABLE_VER 0x00010000 + +/* Assume 2 Mbit size */ +#define EEPROM_SIZE_BYTES 256000 +#define EEPROM_PAGE__SIZE_BYTES 256 +#define EEPROM_HDR_START 0 +#define EEPROM_RECORD_START (EEPROM_HDR_START + EEPROM_TABLE_HEADER_SIZE) +#define EEPROM_MAX_RECORD_NUM ((EEPROM_SIZE_BYTES - EEPROM_TABLE_HEADER_SIZE) / EEPROM_TABLE_RECORD_SIZE) +#define EEPROM_ADDR_MSB_MASK GENMASK(17, 8) + +#define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev + +static void __encode_table_header_to_buff(struct amdgpu_ras_eeprom_table_header *hdr, + unsigned char *buff) +{ + uint32_t *pp = (uint32_t *) buff; + + pp[0] = cpu_to_le32(hdr->header); + pp[1] = cpu_to_le32(hdr->version); + pp[2] = cpu_to_le32(hdr->first_rec_offset); + pp[3] = cpu_to_le32(hdr->tbl_size); + pp[4] = cpu_to_le32(hdr->checksum); +} + +static void __decode_table_header_from_buff(struct amdgpu_ras_eeprom_table_header *hdr, + unsigned char *buff) +{ + uint32_t *pp = (uint32_t *)buff; + + hdr->header = le32_to_cpu(pp[0]); + hdr->version = le32_to_cpu(pp[1]); + hdr->first_rec_offset = le32_to_cpu(pp[2]); + hdr->tbl_size = le32_to_cpu(pp[3]); + hdr->checksum = le32_to_cpu(pp[4]); +} + +static int __update_table_header(struct amdgpu_ras_eeprom_control *control, + unsigned char *buff) +{ + int ret = 0; + struct i2c_msg msg = { + .addr = EEPROM_I2C_TARGET_ADDR, + .flags = 0, + .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, + .buf = buff, + }; + + + *(uint16_t *)buff = EEPROM_HDR_START; + __encode_table_header_to_buff(&control->tbl_hdr, buff + EEPROM_ADDRESS_SIZE); + + ret = i2c_transfer(&control->eeprom_accessor, &msg, 1); + if (ret < 1) + DRM_ERROR("Failed to write EEPROM table header, ret:%d", ret); + + return ret; +} + + + +static uint32_t __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control) +{ + int i; + uint32_t tbl_sum = 0; + + /* Header checksum, skip checksum field in the calculation */ + for (i = 0; i < sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum); i++) + tbl_sum += *(((unsigned char *)&control->tbl_hdr) + i); + + return tbl_sum; +} + +static uint32_t __calc_recs_byte_sum(struct eeprom_table_record *records, + int num) +{ + int i, j; + uint32_t tbl_sum = 0; + + /* Records checksum */ + for (i = 0; i < num; i++) { + struct eeprom_table_record *record = &records[i]; + + for (j = 0; j < sizeof(*record); j++) { + tbl_sum += *(((unsigned char *)record) + j); + } + } + + return tbl_sum; +} + +static inline uint32_t __calc_tbl_byte_sum(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *records, int num) +{ + return __calc_hdr_byte_sum(control) + __calc_recs_byte_sum(records, num); +} + +/* Checksum = 256 -((sum of all table entries) mod 256) */ +static void __update_tbl_checksum(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *records, int num, + uint32_t old_hdr_byte_sum) +{ + /* + * This will update the table sum with new records. + * + * TODO: What happens when the EEPROM table is to be wrapped around + * and old records from start will get overridden. + */ + + /* need to recalculate updated header byte sum */ + control->tbl_byte_sum -= old_hdr_byte_sum; + control->tbl_byte_sum += __calc_tbl_byte_sum(control, records, num); + + control->tbl_hdr.checksum = 256 - (control->tbl_byte_sum % 256); +} + +/* table sum mod 256 + checksum must equals 256 */ +static bool __validate_tbl_checksum(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *records, int num) +{ + control->tbl_byte_sum = __calc_tbl_byte_sum(control, records, num); + + if (control->tbl_hdr.checksum + (control->tbl_byte_sum % 256) != 256) { + DRM_WARN("Checksum mismatch, checksum: %u ", control->tbl_hdr.checksum); + return false; + } + + return true; +} + +int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) +{ + unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 }; + struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; + int ret = 0; + + mutex_lock(&control->tbl_mutex); + + hdr->header = EEPROM_TABLE_HDR_VAL; + hdr->version = EEPROM_TABLE_VER; + hdr->first_rec_offset = EEPROM_RECORD_START; + hdr->tbl_size = EEPROM_TABLE_HEADER_SIZE; + + control->tbl_byte_sum = 0; + __update_tbl_checksum(control, NULL, 0, 0); + control->next_addr = EEPROM_RECORD_START; + + ret = __update_table_header(control, buff); + + mutex_unlock(&control->tbl_mutex); + + return ret; + +} + +int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) +{ + int ret = 0; + struct amdgpu_device *adev = to_amdgpu_device(control); + unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 }; + struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; + struct i2c_msg msg = { + .addr = EEPROM_I2C_TARGET_ADDR, + .flags = I2C_M_RD, + .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, + .buf = buff, + }; + + mutex_init(&control->tbl_mutex); + + switch (adev->asic_type) { + case CHIP_VEGA20: + ret = smu_v11_0_i2c_eeprom_control_init(&control->eeprom_accessor); + break; + + case CHIP_ARCTURUS: + ret = smu_i2c_eeprom_init(&adev->smu, &control->eeprom_accessor); + break; + + default: + return 0; + } + + if (ret) { + DRM_ERROR("Failed to init I2C controller, ret:%d", ret); + return ret; + } + + /* Read/Create table header from EEPROM address 0 */ + ret = i2c_transfer(&control->eeprom_accessor, &msg, 1); + if (ret < 1) { + DRM_ERROR("Failed to read EEPROM table header, ret:%d", ret); + return ret; + } + + __decode_table_header_from_buff(hdr, &buff[2]); + + if (hdr->header == EEPROM_TABLE_HDR_VAL) { + control->num_recs = (hdr->tbl_size - EEPROM_TABLE_HEADER_SIZE) / + EEPROM_TABLE_RECORD_SIZE; + control->tbl_byte_sum = __calc_hdr_byte_sum(control); + control->next_addr = EEPROM_RECORD_START; + + DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records", + control->num_recs); + + } else { + DRM_INFO("Creating new EEPROM table"); + + ret = amdgpu_ras_eeprom_reset_table(control); + } + + return ret == 1 ? 0 : -EIO; +} + +void amdgpu_ras_eeprom_fini(struct amdgpu_ras_eeprom_control *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + + switch (adev->asic_type) { + case CHIP_VEGA20: + smu_v11_0_i2c_eeprom_control_fini(&control->eeprom_accessor); + break; + case CHIP_ARCTURUS: + smu_i2c_eeprom_fini(&adev->smu, &control->eeprom_accessor); + break; + + default: + return; + } +} + +static void __encode_table_record_to_buff(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *record, + unsigned char *buff) +{ + __le64 tmp = 0; + int i = 0; + + /* Next are all record fields according to EEPROM page spec in LE foramt */ + buff[i++] = record->err_type; + + buff[i++] = record->bank; + + tmp = cpu_to_le64(record->ts); + memcpy(buff + i, &tmp, 8); + i += 8; + + tmp = cpu_to_le64((record->offset & 0xffffffffffff)); + memcpy(buff + i, &tmp, 6); + i += 6; + + buff[i++] = record->mem_channel; + buff[i++] = record->mcumc_id; + + tmp = cpu_to_le64((record->retired_page & 0xffffffffffff)); + memcpy(buff + i, &tmp, 6); +} + +static void __decode_table_record_from_buff(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *record, + unsigned char *buff) +{ + __le64 tmp = 0; + int i = 0; + + /* Next are all record fields according to EEPROM page spec in LE foramt */ + record->err_type = buff[i++]; + + record->bank = buff[i++]; + + memcpy(&tmp, buff + i, 8); + record->ts = le64_to_cpu(tmp); + i += 8; + + memcpy(&tmp, buff + i, 6); + record->offset = (le64_to_cpu(tmp) & 0xffffffffffff); + i += 6; + + record->mem_channel = buff[i++]; + record->mcumc_id = buff[i++]; + + memcpy(&tmp, buff + i, 6); + record->retired_page = (le64_to_cpu(tmp) & 0xffffffffffff); +} + +/* + * When reaching end of EEPROM memory jump back to 0 record address + * When next record access will go beyond EEPROM page boundary modify bits A17/A8 + * in I2C selector to go to next page + */ +static uint32_t __correct_eeprom_dest_address(uint32_t curr_address) +{ + uint32_t next_address = curr_address + EEPROM_TABLE_RECORD_SIZE; + + /* When all EEPROM memory used jump back to 0 address */ + if (next_address > EEPROM_SIZE_BYTES) { + DRM_INFO("Reached end of EEPROM memory, jumping to 0 " + "and overriding old record"); + return EEPROM_RECORD_START; + } + + /* + * To check if we overflow page boundary compare next address with + * current and see if bits 17/8 of the EEPROM address will change + * If they do start from the next 256b page + * + * https://www.st.com/resource/en/datasheet/m24m02-dr.pdf sec. 5.1.2 + */ + if ((curr_address & EEPROM_ADDR_MSB_MASK) != (next_address & EEPROM_ADDR_MSB_MASK)) { + DRM_DEBUG_DRIVER("Reached end of EEPROM memory page, jumping to next: %lx", + (next_address & EEPROM_ADDR_MSB_MASK)); + + return (next_address & EEPROM_ADDR_MSB_MASK); + } + + return curr_address; +} + +int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *records, + bool write, + int num) +{ + int i, ret = 0; + struct i2c_msg *msgs, *msg; + unsigned char *buffs, *buff; + struct eeprom_table_record *record; + struct amdgpu_device *adev = to_amdgpu_device(control); + + if (adev->asic_type != CHIP_VEGA20 && adev->asic_type != CHIP_ARCTURUS) + return 0; + + buffs = kcalloc(num, EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE, + GFP_KERNEL); + if (!buffs) + return -ENOMEM; + + mutex_lock(&control->tbl_mutex); + + msgs = kcalloc(num, sizeof(*msgs), GFP_KERNEL); + if (!msgs) { + ret = -ENOMEM; + goto free_buff; + } + + /* In case of overflow just start from beginning to not lose newest records */ + if (write && (control->next_addr + EEPROM_TABLE_RECORD_SIZE * num > EEPROM_SIZE_BYTES)) + control->next_addr = EEPROM_RECORD_START; + + + /* + * TODO Currently makes EEPROM writes for each record, this creates + * internal fragmentation. Optimized the code to do full page write of + * 256b + */ + for (i = 0; i < num; i++) { + buff = &buffs[i * (EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; + record = &records[i]; + msg = &msgs[i]; + + control->next_addr = __correct_eeprom_dest_address(control->next_addr); + + /* + * Update bits 16,17 of EEPROM address in I2C address by setting them + * to bits 1,2 of Device address byte + */ + msg->addr = EEPROM_I2C_TARGET_ADDR | + ((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15); + msg->flags = write ? 0 : I2C_M_RD; + msg->len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE; + msg->buf = buff; + + /* Insert the EEPROM dest addess, bits 0-15 */ + buff[0] = ((control->next_addr >> 8) & 0xff); + buff[1] = (control->next_addr & 0xff); + + /* EEPROM table content is stored in LE format */ + if (write) + __encode_table_record_to_buff(control, record, buff + EEPROM_ADDRESS_SIZE); + + /* + * The destination EEPROM address might need to be corrected to account + * for page or entire memory wrapping + */ + control->next_addr += EEPROM_TABLE_RECORD_SIZE; + } + + ret = i2c_transfer(&control->eeprom_accessor, msgs, num); + if (ret < 1) { + DRM_ERROR("Failed to process EEPROM table records, ret:%d", ret); + + /* TODO Restore prev next EEPROM address ? */ + goto free_msgs; + } + + + if (!write) { + for (i = 0; i < num; i++) { + buff = &buffs[i*(EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; + record = &records[i]; + + __decode_table_record_from_buff(control, record, buff + EEPROM_ADDRESS_SIZE); + } + } + + if (write) { + uint32_t old_hdr_byte_sum = __calc_hdr_byte_sum(control); + + /* + * Update table header with size and CRC and account for table + * wrap around where the assumption is that we treat it as empty + * table + * + * TODO - Check the assumption is correct + */ + control->num_recs += num; + control->num_recs %= EEPROM_MAX_RECORD_NUM; + control->tbl_hdr.tbl_size += EEPROM_TABLE_RECORD_SIZE * num; + if (control->tbl_hdr.tbl_size > EEPROM_SIZE_BYTES) + control->tbl_hdr.tbl_size = EEPROM_TABLE_HEADER_SIZE + + control->num_recs * EEPROM_TABLE_RECORD_SIZE; + + __update_tbl_checksum(control, records, num, old_hdr_byte_sum); + + __update_table_header(control, buffs); + } else if (!__validate_tbl_checksum(control, records, num)) { + DRM_WARN("EEPROM Table checksum mismatch!"); + /* TODO Uncomment when EEPROM read/write is relliable */ + /* ret = -EIO; */ + } + +free_msgs: + kfree(msgs); + +free_buff: + kfree(buffs); + + mutex_unlock(&control->tbl_mutex); + + return ret == num ? 0 : -EIO; +} + +/* Used for testing if bugs encountered */ +#if 0 +void amdgpu_ras_eeprom_test(struct amdgpu_ras_eeprom_control *control) +{ + int i; + struct eeprom_table_record *recs = kcalloc(1, sizeof(*recs), GFP_KERNEL); + + if (!recs) + return; + + for (i = 0; i < 1 ; i++) { + recs[i].address = 0xdeadbeef; + recs[i].retired_page = i; + } + + if (!amdgpu_ras_eeprom_process_recods(control, recs, true, 1)) { + + memset(recs, 0, sizeof(*recs) * 1); + + control->next_addr = EEPROM_RECORD_START; + + if (!amdgpu_ras_eeprom_process_recods(control, recs, false, 1)) { + for (i = 0; i < 1; i++) + DRM_INFO("rec.address :0x%llx, rec.retired_page :%llu", + recs[i].address, recs[i].retired_page); + } else + DRM_ERROR("Failed in reading from table"); + + } else + DRM_ERROR("Failed in writing to table"); +} +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h new file mode 100644 index 000000000000..622269957c1b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -0,0 +1,91 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _AMDGPU_RAS_EEPROM_H +#define _AMDGPU_RAS_EEPROM_H + +#include <linux/i2c.h> + +struct amdgpu_device; + +enum amdgpu_ras_eeprom_err_type{ + AMDGPU_RAS_EEPROM_ERR_PLACE_HOLDER, + AMDGPU_RAS_EEPROM_ERR_RECOVERABLE, + AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE +}; + +struct amdgpu_ras_eeprom_table_header { + uint32_t header; + uint32_t version; + uint32_t first_rec_offset; + uint32_t tbl_size; + uint32_t checksum; +}__attribute__((__packed__)); + +struct amdgpu_ras_eeprom_control { + struct amdgpu_ras_eeprom_table_header tbl_hdr; + struct i2c_adapter eeprom_accessor; + uint32_t next_addr; + unsigned int num_recs; + struct mutex tbl_mutex; + bool bus_locked; + uint32_t tbl_byte_sum; +}; + +/* + * Represents single table record. Packed to be easily serialized into byte + * stream. + */ +struct eeprom_table_record { + + union { + uint64_t address; + uint64_t offset; + }; + + uint64_t retired_page; + uint64_t ts; + + enum amdgpu_ras_eeprom_err_type err_type; + + union { + unsigned char bank; + unsigned char cu; + }; + + unsigned char mem_channel; + unsigned char mcumc_id; +}__attribute__((__packed__)); + +int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control); +void amdgpu_ras_eeprom_fini(struct amdgpu_ras_eeprom_control *control); +int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control); + +int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *records, + bool write, + int num); + +void amdgpu_ras_eeprom_test(struct amdgpu_ras_eeprom_control *control); + +#endif // _AMDGPU_RAS_EEPROM_H diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 335a0edf114b..e5c83e164d82 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -28,8 +28,9 @@ */ #include <linux/seq_file.h> #include <linux/slab.h> +#include <linux/uaccess.h> #include <linux/debugfs.h> -#include <drm/drmP.h> + #include <drm/amdgpu_drm.h> #include "amdgpu.h" #include "atom.h" @@ -248,6 +249,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, */ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) sched_hw_submission = max(sched_hw_submission, 256); + else if (ring == &adev->sdma.instance[0].page) + sched_hw_submission = 256; if (ring->adev == NULL) { if (adev->num_rings >= AMDGPU_MAX_RINGS) @@ -279,6 +282,16 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, return r; } + r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs); + if (r) { + dev_err(adev->dev, + "(%d) ring trail_fence_offs wb alloc failed\n", r); + return r; + } + ring->trail_fence_gpu_addr = + adev->wb.gpu_addr + (ring->trail_fence_offs * 4); + ring->trail_fence_cpu_addr = &adev->wb.wb[ring->trail_fence_offs]; + r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs); if (r) { dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r); @@ -397,7 +410,7 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, { ktime_t deadline = ktime_add_us(ktime_get(), 10000); - if (!ring->funcs->soft_recovery || !fence) + if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence) return false; atomic_inc(&ring->adev->gpu_reset_counter); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index d7fae2676269..930316e60155 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -29,8 +29,8 @@ #include <drm/drm_print.h> /* max number of rings */ -#define AMDGPU_MAX_RINGS 23 -#define AMDGPU_MAX_GFX_RINGS 1 +#define AMDGPU_MAX_RINGS 28 +#define AMDGPU_MAX_GFX_RINGS 2 #define AMDGPU_MAX_COMPUTE_RINGS 8 #define AMDGPU_MAX_VCE_RINGS 3 #define AMDGPU_MAX_UVD_ENC_RINGS 2 @@ -114,6 +114,7 @@ struct amdgpu_ring_funcs { uint32_t align_mask; u32 nop; bool support_64bit_ptrs; + bool no_user_fence; unsigned vmhub; unsigned extra_dw; @@ -171,6 +172,7 @@ struct amdgpu_ring_funcs { enum drm_sched_priority priority); /* Try to soft recover the ring to make the fence signal */ void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid); + int (*preempt_ib)(struct amdgpu_ring *ring); }; struct amdgpu_ring { @@ -205,6 +207,10 @@ struct amdgpu_ring { unsigned fence_offs; uint64_t current_ctx; char name[16]; + u32 trail_seq; + unsigned trail_fence_offs; + u64 trail_fence_gpu_addr; + volatile u32 *trail_fence_cpu_addr; unsigned cond_exe_offs; u64 cond_exe_gpu_addr; volatile u32 *cond_exe_cpu_addr; @@ -245,6 +251,7 @@ struct amdgpu_ring { #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) +#define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r) int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); @@ -265,6 +272,12 @@ void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, struct dma_fence *fence); +static inline void amdgpu_ring_set_preempt_cond_exec(struct amdgpu_ring *ring, + bool cond_exec) +{ + *ring->cond_exe_cpu_addr = cond_exec; +} + static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) { int i = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index 49a8ab52113b..d3d4707f2168 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -26,6 +26,94 @@ #include "clearstate_defs.h" +/* firmware ID used in rlc toc */ +typedef enum _FIRMWARE_ID_ { + FIRMWARE_ID_INVALID = 0, + FIRMWARE_ID_RLC_G_UCODE = 1, + FIRMWARE_ID_RLC_TOC = 2, + FIRMWARE_ID_RLCG_SCRATCH = 3, + FIRMWARE_ID_RLC_SRM_ARAM = 4, + FIRMWARE_ID_RLC_SRM_INDEX_ADDR = 5, + FIRMWARE_ID_RLC_SRM_INDEX_DATA = 6, + FIRMWARE_ID_RLC_P_UCODE = 7, + FIRMWARE_ID_RLC_V_UCODE = 8, + FIRMWARE_ID_RLX6_UCODE = 9, + FIRMWARE_ID_RLX6_DRAM_BOOT = 10, + FIRMWARE_ID_GLOBAL_TAP_DELAYS = 11, + FIRMWARE_ID_SE0_TAP_DELAYS = 12, + FIRMWARE_ID_SE1_TAP_DELAYS = 13, + FIRMWARE_ID_GLOBAL_SE0_SE1_SKEW_DELAYS = 14, + FIRMWARE_ID_SDMA0_UCODE = 15, + FIRMWARE_ID_SDMA0_JT = 16, + FIRMWARE_ID_SDMA1_UCODE = 17, + FIRMWARE_ID_SDMA1_JT = 18, + FIRMWARE_ID_CP_CE = 19, + FIRMWARE_ID_CP_PFP = 20, + FIRMWARE_ID_CP_ME = 21, + FIRMWARE_ID_CP_MEC = 22, + FIRMWARE_ID_CP_MES = 23, + FIRMWARE_ID_MES_STACK = 24, + FIRMWARE_ID_RLC_SRM_DRAM_SR = 25, + FIRMWARE_ID_RLCG_SCRATCH_SR = 26, + FIRMWARE_ID_RLCP_SCRATCH_SR = 27, + FIRMWARE_ID_RLCV_SCRATCH_SR = 28, + FIRMWARE_ID_RLX6_DRAM_SR = 29, + FIRMWARE_ID_SDMA0_PG_CONTEXT = 30, + FIRMWARE_ID_SDMA1_PG_CONTEXT = 31, + FIRMWARE_ID_GLOBAL_MUX_SELECT_RAM = 32, + FIRMWARE_ID_SE0_MUX_SELECT_RAM = 33, + FIRMWARE_ID_SE1_MUX_SELECT_RAM = 34, + FIRMWARE_ID_ACCUM_CTRL_RAM = 35, + FIRMWARE_ID_RLCP_CAM = 36, + FIRMWARE_ID_RLC_SPP_CAM_EXT = 37, + FIRMWARE_ID_MAX = 38, +} FIRMWARE_ID; + +typedef struct _RLC_TABLE_OF_CONTENT { + union { + unsigned int DW0; + struct { + unsigned int offset : 25; + unsigned int id : 7; + }; + }; + + union { + unsigned int DW1; + struct { + unsigned int load_at_boot : 1; + unsigned int load_at_vddgfx : 1; + unsigned int load_at_reset : 1; + unsigned int memory_destination : 2; + unsigned int vfflr_image_code : 4; + unsigned int load_mode_direct : 1; + unsigned int save_for_vddgfx : 1; + unsigned int save_for_vfflr : 1; + unsigned int reserved : 1; + unsigned int signed_source : 1; + unsigned int size : 18; + }; + }; + + union { + unsigned int DW2; + struct { + unsigned int indirect_addr_reg : 16; + unsigned int index : 16; + }; + }; + + union { + unsigned int DW3; + struct { + unsigned int indirect_data_reg : 16; + unsigned int indirect_start_offset : 16; + }; + }; +} RLC_TABLE_OF_CONTENT; + +#define RLC_TOC_MAX_SIZE 64 + struct amdgpu_rlc_funcs { bool (*is_rlc_enabled)(struct amdgpu_device *adev); void (*set_safe_mode)(struct amdgpu_device *adev); @@ -85,6 +173,16 @@ struct amdgpu_rlc { u8 *save_restore_list_srm; bool is_rlc_v2_1; + + /* for rlc autoload */ + struct amdgpu_bo *rlc_autoload_bo; + u64 rlc_autoload_gpu_addr; + void *rlc_autoload_ptr; + + /* rlc toc buffer */ + struct amdgpu_bo *rlc_toc_bo; + uint64_t rlc_toc_gpu_addr; + void *rlc_toc_buf; }; void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index bfaf5c6323be..0bd1d4ffc19e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -41,7 +41,7 @@ * If we are asked to block we wait on all the oldest fence of all * rings. We just wait for any of those fence to complete. */ -#include <drm/drmP.h> + #include "amdgpu.h" static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 0767a93e4d91..c799691dfa84 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -23,8 +23,11 @@ */ #include <linux/fdtable.h> +#include <linux/file.h> #include <linux/pid.h> + #include <drm/amdgpu_drm.h> + #include "amdgpu.h" #include "amdgpu_vm.h" @@ -53,26 +56,25 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev, int fd, enum drm_sched_priority priority) { - struct file *filp = fget(fd); + struct fd f = fdget(fd); struct amdgpu_fpriv *fpriv; struct amdgpu_ctx *ctx; uint32_t id; int r; - if (!filp) + if (!f.file) return -EINVAL; - r = amdgpu_file_to_fpriv(filp, &fpriv); + r = amdgpu_file_to_fpriv(f.file, &fpriv); if (r) { - fput(filp); + fdput(f); return r; } idr_for_each_entry(&fpriv->ctx_mgr.ctx_handles, ctx, id) amdgpu_ctx_priority_override(ctx, priority); - fput(filp); - + fdput(f); return 0; } @@ -81,30 +83,30 @@ static int amdgpu_sched_context_priority_override(struct amdgpu_device *adev, unsigned ctx_id, enum drm_sched_priority priority) { - struct file *filp = fget(fd); + struct fd f = fdget(fd); struct amdgpu_fpriv *fpriv; struct amdgpu_ctx *ctx; int r; - if (!filp) + if (!f.file) return -EINVAL; - r = amdgpu_file_to_fpriv(filp, &fpriv); + r = amdgpu_file_to_fpriv(f.file, &fpriv); if (r) { - fput(filp); + fdput(f); return r; } ctx = amdgpu_ctx_get(fpriv, ctx_id); if (!ctx) { - fput(filp); + fdput(f); return -EINVAL; } amdgpu_ctx_priority_override(ctx, priority); amdgpu_ctx_put(ctx); - fput(filp); + fdput(f); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h index 2a1a0c734bdd..12299fd95691 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h @@ -25,7 +25,10 @@ #ifndef __AMDGPU_SCHED_H__ #define __AMDGPU_SCHED_H__ -#include <drm/drmP.h> +enum drm_sched_priority; + +struct drm_device; +struct drm_file; enum drm_sched_priority amdgpu_to_sched_priority(int amdgpu_priority); int amdgpu_sched_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 115bb0c99b0f..6010999d9020 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -20,9 +20,14 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include <drm/drmP.h> + #include "amdgpu.h" #include "amdgpu_sdma.h" +#include "amdgpu_ras.h" + +#define AMDGPU_CSA_SDMA_SIZE 64 +/* SDMA CSA reside in the 3rd page of CSA */ +#define AMDGPU_CSA_SDMA_OFFSET (4096 * 2) /* * GPU SDMA IP block helpers function. @@ -56,3 +61,124 @@ int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index) return -EINVAL; } + +uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, + unsigned vmid) +{ + struct amdgpu_device *adev = ring->adev; + uint64_t csa_mc_addr; + uint32_t index = 0; + int r; + + if (vmid == 0 || !amdgpu_mcbp) + return 0; + + r = amdgpu_sdma_get_index_from_ring(ring, &index); + + if (r || index > 31) + csa_mc_addr = 0; + else + csa_mc_addr = amdgpu_csa_vaddr(adev) + + AMDGPU_CSA_SDMA_OFFSET + + index * AMDGPU_CSA_SDMA_SIZE; + + return csa_mc_addr; +} + +int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev, + void *ras_ih_info) +{ + int r, i; + struct ras_ih_if *ih_info = (struct ras_ih_if *)ras_ih_info; + struct ras_fs_if fs_info = { + .sysfs_name = "sdma_err_count", + .debugfs_name = "sdma_err_inject", + }; + + if (!ih_info) + return -EINVAL; + + if (!adev->sdma.ras_if) { + adev->sdma.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!adev->sdma.ras_if) + return -ENOMEM; + adev->sdma.ras_if->block = AMDGPU_RAS_BLOCK__SDMA; + adev->sdma.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->sdma.ras_if->sub_block_index = 0; + strcpy(adev->sdma.ras_if->name, "sdma"); + } + fs_info.head = ih_info->head = *adev->sdma.ras_if; + + r = amdgpu_ras_late_init(adev, adev->sdma.ras_if, + &fs_info, ih_info); + if (r) + goto free; + + if (amdgpu_ras_is_supported(adev, adev->sdma.ras_if->block)) { + for (i = 0; i < adev->sdma.num_instances; i++) { + r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, + AMDGPU_SDMA_IRQ_INSTANCE0 + i); + if (r) + goto late_fini; + } + } else { + r = 0; + goto free; + } + + return 0; + +late_fini: + amdgpu_ras_late_fini(adev, adev->sdma.ras_if, ih_info); +free: + kfree(adev->sdma.ras_if); + adev->sdma.ras_if = NULL; + return r; +} + +void amdgpu_sdma_ras_fini(struct amdgpu_device *adev) +{ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) && + adev->sdma.ras_if) { + struct ras_common_if *ras_if = adev->sdma.ras_if; + struct ras_ih_if ih_info = { + .head = *ras_if, + /* the cb member will not be used by + * amdgpu_ras_interrupt_remove_handler, init it only + * to cheat the check in ras_late_fini + */ + .cb = amdgpu_sdma_process_ras_data_cb, + }; + + amdgpu_ras_late_fini(adev, ras_if, &ih_info); + kfree(ras_if); + } +} + +int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev, + void *err_data, + struct amdgpu_iv_entry *entry) +{ + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + amdgpu_ras_reset_gpu(adev, 0); + + return AMDGPU_RAS_SUCCESS; +} + +int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct ras_common_if *ras_if = adev->sdma.ras_if; + struct ras_dispatch_if ih_data = { + .entry = entry, + }; + + if (!ras_if) + return 0; + + ih_data.head = *ras_if; + + amdgpu_ras_interrupt_dispatch(adev, &ih_data); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 16b1a6ae5ba6..761ff8be6314 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -25,12 +25,17 @@ #define __AMDGPU_SDMA_H__ /* max number of IP instances */ -#define AMDGPU_MAX_SDMA_INSTANCES 2 +#define AMDGPU_MAX_SDMA_INSTANCES 8 enum amdgpu_sdma_irq { - AMDGPU_SDMA_IRQ_TRAP0 = 0, - AMDGPU_SDMA_IRQ_TRAP1, - + AMDGPU_SDMA_IRQ_INSTANCE0 = 0, + AMDGPU_SDMA_IRQ_INSTANCE1, + AMDGPU_SDMA_IRQ_INSTANCE2, + AMDGPU_SDMA_IRQ_INSTANCE3, + AMDGPU_SDMA_IRQ_INSTANCE4, + AMDGPU_SDMA_IRQ_INSTANCE5, + AMDGPU_SDMA_IRQ_INSTANCE6, + AMDGPU_SDMA_IRQ_INSTANCE7, AMDGPU_SDMA_IRQ_LAST }; @@ -49,9 +54,11 @@ struct amdgpu_sdma { struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES]; struct amdgpu_irq_src trap_irq; struct amdgpu_irq_src illegal_inst_irq; + struct amdgpu_irq_src ecc_irq; int num_instances; uint32_t srbm_soft_reset; bool has_page_queue; + struct ras_common_if *ras_if; }; /* @@ -96,5 +103,14 @@ struct amdgpu_buffer_funcs { struct amdgpu_sdma_instance * amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring); int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index); - +uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, unsigned vmid); +int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev, + void *ras_ih_info); +void amdgpu_sdma_ras_fini(struct amdgpu_device *adev); +int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev, + void *err_data, + struct amdgpu_iv_entry *entry); +int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_socbb.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_socbb.h new file mode 100644 index 000000000000..f4176cb01790 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_socbb.h @@ -0,0 +1,82 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_SOCBB_H__ +#define __AMDGPU_SOCBB_H__ + +struct gpu_info_voltage_scaling_v1_0 { + uint32_t state; + uint32_t dscclk_mhz; + uint32_t dcfclk_mhz; + uint32_t socclk_mhz; + uint32_t dram_speed_mts; + uint32_t fabricclk_mhz; + uint32_t dispclk_mhz; + uint32_t phyclk_mhz; + uint32_t dppclk_mhz; +}; + +struct gpu_info_soc_bounding_box_v1_0 { + uint32_t sr_exit_time_us; + uint32_t sr_enter_plus_exit_time_us; + uint32_t urgent_latency_us; + uint32_t urgent_latency_pixel_data_only_us; + uint32_t urgent_latency_pixel_mixed_with_vm_data_us; + uint32_t urgent_latency_vm_data_only_us; + uint32_t writeback_latency_us; + uint32_t ideal_dram_bw_after_urgent_percent; + uint32_t pct_ideal_dram_sdp_bw_after_urgent_pixel_only; // PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly + uint32_t pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm; + uint32_t pct_ideal_dram_sdp_bw_after_urgent_vm_only; + uint32_t max_avg_sdp_bw_use_normal_percent; + uint32_t max_avg_dram_bw_use_normal_percent; + uint32_t max_request_size_bytes; + uint32_t downspread_percent; + uint32_t dram_page_open_time_ns; + uint32_t dram_rw_turnaround_time_ns; + uint32_t dram_return_buffer_per_channel_bytes; + uint32_t dram_channel_width_bytes; + uint32_t fabric_datapath_to_dcn_data_return_bytes; + uint32_t dcn_downspread_percent; + uint32_t dispclk_dppclk_vco_speed_mhz; + uint32_t dfs_vco_period_ps; + uint32_t urgent_out_of_order_return_per_channel_pixel_only_bytes; + uint32_t urgent_out_of_order_return_per_channel_pixel_and_vm_bytes; + uint32_t urgent_out_of_order_return_per_channel_vm_only_bytes; + uint32_t round_trip_ping_latency_dcfclk_cycles; + uint32_t urgent_out_of_order_return_per_channel_bytes; + uint32_t channel_interleave_bytes; + uint32_t num_banks; + uint32_t num_chans; + uint32_t vmm_page_size_bytes; + uint32_t dram_clock_change_latency_us; + uint32_t writeback_dram_clock_change_latency_us; + uint32_t return_bus_width_bytes; + uint32_t voltage_override; + uint32_t xfc_bus_transport_time_us; + uint32_t xfc_xbuf_latency_tolerance_us; + uint32_t use_urgent_burst_bw; + uint32_t num_states; + struct gpu_info_voltage_scaling_v1_0 clock_limits[8]; +}; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 2d6f5ec77a68..95e5e93edd18 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -28,7 +28,6 @@ * Christian König <christian.koenig@amd.com> */ -#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" @@ -191,10 +190,10 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, */ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, - struct reservation_object *resv, + struct dma_resv *resv, void *owner, bool explicit_sync) { - struct reservation_object_list *flist; + struct dma_resv_list *flist; struct dma_fence *f; void *fence_owner; unsigned i; @@ -204,16 +203,16 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, return -EINVAL; /* always sync to the exclusive fence */ - f = reservation_object_get_excl(resv); + f = dma_resv_get_excl(resv); r = amdgpu_sync_fence(adev, sync, f, false); - flist = reservation_object_get_list(resv); + flist = dma_resv_get_list(resv); if (!flist || r) return r; for (i = 0; i < flist->shared_count; ++i) { f = rcu_dereference_protected(flist->shared[i], - reservation_object_held(resv)); + dma_resv_held(resv)); /* We only want to trigger KFD eviction fences on * evict or move jobs. Skip KFD fences otherwise. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index 10cf23a57f17..b5f1778a2319 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -27,7 +27,7 @@ #include <linux/hashtable.h> struct dma_fence; -struct reservation_object; +struct dma_resv; struct amdgpu_device; struct amdgpu_ring; @@ -44,7 +44,7 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_fence *f, bool explicit); int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, - struct reservation_object *resv, + struct dma_resv *resv, void *owner, bool explicit_sync); struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index 8904e62dca7a..b158230af8db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -22,7 +22,7 @@ * * Authors: Michel Dänzer */ -#include <drm/drmP.h> + #include <drm/amdgpu_drm.h> #include "amdgpu.h" #include "amdgpu_uvd.h" @@ -138,6 +138,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) } dma_fence_put(fence); + fence = NULL; r = amdgpu_bo_kmap(vram_obj, &vram_map); if (r) { @@ -183,6 +184,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) } dma_fence_put(fence); + fence = NULL; r = amdgpu_bo_kmap(gtt_obj[i], >t_map); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index d3ca2424b5fe..63e734a125fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -28,8 +28,6 @@ #include <linux/types.h> #include <linux/tracepoint.h> -#include <drm/drmP.h> - #undef TRACE_SYSTEM #define TRACE_SYSTEM amdgpu #define TRACE_INCLUDE_FILE amdgpu_trace @@ -172,7 +170,7 @@ TRACE_EVENT(amdgpu_cs_ioctl, __field(unsigned int, context) __field(unsigned int, seqno) __field(struct dma_fence *, fence) - __field(char *, ring_name) + __string(ring, to_amdgpu_ring(job->base.sched)->name) __field(u32, num_ibs) ), @@ -181,12 +179,12 @@ TRACE_EVENT(amdgpu_cs_ioctl, __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)) __entry->context = job->base.s_fence->finished.context; __entry->seqno = job->base.s_fence->finished.seqno; - __entry->ring_name = to_amdgpu_ring(job->base.sched)->name; + __assign_str(ring, to_amdgpu_ring(job->base.sched)->name) __entry->num_ibs = job->num_ibs; ), TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u", __entry->sched_job_id, __get_str(timeline), __entry->context, - __entry->seqno, __entry->ring_name, __entry->num_ibs) + __entry->seqno, __get_str(ring), __entry->num_ibs) ); TRACE_EVENT(amdgpu_sched_run_job, @@ -197,7 +195,7 @@ TRACE_EVENT(amdgpu_sched_run_job, __string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)) __field(unsigned int, context) __field(unsigned int, seqno) - __field(char *, ring_name) + __string(ring, to_amdgpu_ring(job->base.sched)->name) __field(u32, num_ibs) ), @@ -206,12 +204,12 @@ TRACE_EVENT(amdgpu_sched_run_job, __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)) __entry->context = job->base.s_fence->finished.context; __entry->seqno = job->base.s_fence->finished.seqno; - __entry->ring_name = to_amdgpu_ring(job->base.sched)->name; + __assign_str(ring, to_amdgpu_ring(job->base.sched)->name) __entry->num_ibs = job->num_ibs; ), TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u", __entry->sched_job_id, __get_str(timeline), __entry->context, - __entry->seqno, __entry->ring_name, __entry->num_ibs) + __entry->seqno, __get_str(ring), __entry->num_ibs) ); @@ -325,14 +323,15 @@ DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_cs, TRACE_EVENT(amdgpu_vm_set_ptes, TP_PROTO(uint64_t pe, uint64_t addr, unsigned count, - uint32_t incr, uint64_t flags), - TP_ARGS(pe, addr, count, incr, flags), + uint32_t incr, uint64_t flags, bool direct), + TP_ARGS(pe, addr, count, incr, flags, direct), TP_STRUCT__entry( __field(u64, pe) __field(u64, addr) __field(u32, count) __field(u32, incr) __field(u64, flags) + __field(bool, direct) ), TP_fast_assign( @@ -341,28 +340,32 @@ TRACE_EVENT(amdgpu_vm_set_ptes, __entry->count = count; __entry->incr = incr; __entry->flags = flags; + __entry->direct = direct; ), - TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u", - __entry->pe, __entry->addr, __entry->incr, - __entry->flags, __entry->count) + TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u, " + "direct=%d", __entry->pe, __entry->addr, __entry->incr, + __entry->flags, __entry->count, __entry->direct) ); TRACE_EVENT(amdgpu_vm_copy_ptes, - TP_PROTO(uint64_t pe, uint64_t src, unsigned count), - TP_ARGS(pe, src, count), + TP_PROTO(uint64_t pe, uint64_t src, unsigned count, bool direct), + TP_ARGS(pe, src, count, direct), TP_STRUCT__entry( __field(u64, pe) __field(u64, src) __field(u32, count) + __field(bool, direct) ), TP_fast_assign( __entry->pe = pe; __entry->src = src; __entry->count = count; + __entry->direct = direct; ), - TP_printk("pe=%010Lx, src=%010Lx, count=%u", - __entry->pe, __entry->src, __entry->count) + TP_printk("pe=%010Lx, src=%010Lx, count=%u, direct=%d", + __entry->pe, __entry->src, __entry->count, + __entry->direct) ); TRACE_EVENT(amdgpu_vm_flush, @@ -470,7 +473,7 @@ TRACE_EVENT(amdgpu_ib_pipe_sync, TP_PROTO(struct amdgpu_job *sched_job, struct dma_fence *fence), TP_ARGS(sched_job, fence), TP_STRUCT__entry( - __field(const char *,name) + __string(ring, sched_job->base.sched->name) __field(uint64_t, id) __field(struct dma_fence *, fence) __field(uint64_t, ctx) @@ -478,14 +481,14 @@ TRACE_EVENT(amdgpu_ib_pipe_sync, ), TP_fast_assign( - __entry->name = sched_job->base.sched->name; + __assign_str(ring, sched_job->base.sched->name) __entry->id = sched_job->base.id; __entry->fence = fence; __entry->ctx = fence->context; __entry->seqno = fence->seqno; ), TP_printk("job ring=%s, id=%llu, need pipe sync to fence=%p, context=%llu, seq=%u", - __entry->name, __entry->id, + __get_str(ring), __entry->id, __entry->fence, __entry->ctx, __entry->seqno) ); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c index f212402570a5..57c6c39ba064 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c @@ -21,7 +21,7 @@ * * Author : Dave Airlie <airlied@redhat.com> */ -#include <drm/drmP.h> + #include <drm/amdgpu_drm.h> #include "amdgpu.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 73e71e61dc99..2616e2eafdeb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -29,29 +29,36 @@ * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com> * Dave Airlie */ + +#include <linux/dma-mapping.h> +#include <linux/iommu.h> +#include <linux/hmm.h> +#include <linux/pagemap.h> +#include <linux/sched/task.h> +#include <linux/sched/mm.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/swap.h> +#include <linux/swiotlb.h> +#include <linux/dma-buf.h> + #include <drm/ttm/ttm_bo_api.h> #include <drm/ttm/ttm_bo_driver.h> #include <drm/ttm/ttm_placement.h> #include <drm/ttm/ttm_module.h> #include <drm/ttm/ttm_page_alloc.h> -#include <drm/drmP.h> + +#include <drm/drm_debugfs.h> #include <drm/amdgpu_drm.h> -#include <linux/seq_file.h> -#include <linux/slab.h> -#include <linux/swiotlb.h> -#include <linux/swap.h> -#include <linux/pagemap.h> -#include <linux/debugfs.h> -#include <linux/iommu.h> + #include "amdgpu.h" #include "amdgpu_object.h" #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" #include "amdgpu_sdma.h" +#include "amdgpu_ras.h" #include "bif/bif_4_1_d.h" -#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) - static int amdgpu_map_buffer(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem, unsigned num_pages, uint64_t offset, unsigned window, @@ -223,7 +230,7 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) if (amdgpu_ttm_tt_get_usermm(bo->ttm)) return -EPERM; - return drm_vma_node_verify_access(&abo->gem_base.vma_node, + return drm_vma_node_verify_access(&abo->tbo.base.vma_node, filp->private_data); } @@ -299,7 +306,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, struct amdgpu_copy_mem *src, struct amdgpu_copy_mem *dst, uint64_t size, - struct reservation_object *resv, + struct dma_resv *resv, struct dma_fence **f) { struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; @@ -387,6 +394,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm, src->mem); src_node_size = (src_mm->size << PAGE_SHIFT); + src_page_offset = 0; } else { src_node_start += cur_size; src_page_offset = src_node_start & (PAGE_SIZE - 1); @@ -396,6 +404,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm, dst->mem); dst_node_size = (dst_mm->size << PAGE_SHIFT); + dst_page_offset = 0; } else { dst_node_start += cur_size; dst_page_offset = dst_node_start & (PAGE_SIZE - 1); @@ -434,10 +443,26 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, new_mem->num_pages << PAGE_SHIFT, - bo->resv, &fence); + bo->base.resv, &fence); if (r) goto error; + /* clear the space being freed */ + if (old_mem->mem_type == TTM_PL_VRAM && + (ttm_to_amdgpu_bo(bo)->flags & + AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { + struct dma_fence *wipe_fence = NULL; + + r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON, + NULL, &wipe_fence); + if (r) { + goto error; + } else if (wipe_fence) { + dma_fence_put(fence); + fence = wipe_fence; + } + } + /* Always block for VM page tables before committing the new location */ if (bo->type == ttm_bo_type_kernel) r = ttm_bo_move_accel_cleanup(bo, fence, true, new_mem); @@ -462,15 +487,12 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, struct ttm_operation_ctx *ctx, struct ttm_mem_reg *new_mem) { - struct amdgpu_device *adev; struct ttm_mem_reg *old_mem = &bo->mem; struct ttm_mem_reg tmp_mem; struct ttm_place placements; struct ttm_placement placement; int r; - adev = amdgpu_ttm_adev(bo->bdev); - /* create space/pages for new_mem in GTT space */ tmp_mem = *new_mem; tmp_mem.mm_node = NULL; @@ -483,6 +505,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx); if (unlikely(r)) { + pr_err("Failed to find GTT space for blit from VRAM\n"); return r; } @@ -520,15 +543,12 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, struct ttm_operation_ctx *ctx, struct ttm_mem_reg *new_mem) { - struct amdgpu_device *adev; struct ttm_mem_reg *old_mem = &bo->mem; struct ttm_mem_reg tmp_mem; struct ttm_placement placement; struct ttm_place placements; int r; - adev = amdgpu_ttm_adev(bo->bdev); - /* make space in GTT for old_mem buffer */ tmp_mem = *new_mem; tmp_mem.mm_node = NULL; @@ -541,6 +561,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx); if (unlikely(r)) { + pr_err("Failed to find GTT space for blit to VRAM\n"); return r; } @@ -561,6 +582,30 @@ out_cleanup: } /** + * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy + * + * Called by amdgpu_bo_move() + */ +static bool amdgpu_mem_visible(struct amdgpu_device *adev, + struct ttm_mem_reg *mem) +{ + struct drm_mm_node *nodes = mem->mm_node; + + if (mem->mem_type == TTM_PL_SYSTEM || + mem->mem_type == TTM_PL_TT) + return true; + if (mem->mem_type != TTM_PL_VRAM) + return false; + + /* ttm_mem_reg_ioremap only supports contiguous memory */ + if (nodes->size != mem->num_pages) + return false; + + return ((nodes->start + nodes->size) << PAGE_SHIFT) + <= adev->gmc.visible_vram_size; +} + +/** * amdgpu_bo_move - Move a buffer object to a new memory location * * Called by ttm_bo_handle_move_mem() @@ -604,8 +649,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, return 0; } - if (!adev->mman.buffer_funcs_enabled) + if (!adev->mman.buffer_funcs_enabled) { + r = -ENODEV; goto memcpy; + } if (old_mem->mem_type == TTM_PL_VRAM && new_mem->mem_type == TTM_PL_SYSTEM) { @@ -620,10 +667,16 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, if (r) { memcpy: - r = ttm_bo_move_memcpy(bo, ctx, new_mem); - if (r) { + /* Check that all memory is CPU accessible */ + if (!amdgpu_mem_visible(adev, old_mem) || + !amdgpu_mem_visible(adev, new_mem)) { + pr_err("Move buffer fallback to memcpy unavailable\n"); return r; } + + r = ttm_bo_move_memcpy(bo, ctx, new_mem); + if (r) + return r; } if (bo->type == ttm_bo_type_device && @@ -705,143 +758,193 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, /* * TTM backend functions. */ -struct amdgpu_ttm_gup_task_list { - struct list_head list; - struct task_struct *task; -}; - struct amdgpu_ttm_tt { struct ttm_dma_tt ttm; + struct drm_gem_object *gobj; u64 offset; uint64_t userptr; struct task_struct *usertask; uint32_t userflags; - spinlock_t guptasklock; - struct list_head guptasks; - atomic_t mmu_invalidations; - uint32_t last_set_pages; +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) + struct hmm_range *range; +#endif +}; + +#ifdef CONFIG_DRM_AMDGPU_USERPTR +/* flags used by HMM internal, not related to CPU/GPU PTE flags */ +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { + (1 << 0), /* HMM_PFN_VALID */ + (1 << 1), /* HMM_PFN_WRITE */ + 0 /* HMM_PFN_DEVICE_PRIVATE */ +}; + +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { + 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ + 0, /* HMM_PFN_NONE */ + 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ }; /** - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR - * pointer to memory + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user + * memory and start HMM tracking CPU page table update * - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos(). - * This provides a wrapper around the get_user_pages() call to provide - * device accessible pages that back user memory. + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only + * once afterwards to stop HMM tracking */ -int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) { + struct ttm_tt *ttm = bo->tbo.ttm; struct amdgpu_ttm_tt *gtt = (void *)ttm; - struct mm_struct *mm = gtt->usertask->mm; - unsigned int flags = 0; - unsigned pinned = 0; - int r; + unsigned long start = gtt->userptr; + struct vm_area_struct *vma; + struct hmm_range *range; + unsigned long timeout; + struct mm_struct *mm; + unsigned long i; + int r = 0; - if (!mm) /* Happens during process shutdown */ - return -ESRCH; + mm = bo->notifier.mm; + if (unlikely(!mm)) { + DRM_DEBUG_DRIVER("BO is not registered?\n"); + return -EFAULT; + } - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) - flags |= FOLL_WRITE; + /* Another get_user_pages is running at the same time?? */ + if (WARN_ON(gtt->range)) + return -EFAULT; - down_read(&mm->mmap_sem); + if (!mmget_not_zero(mm)) /* Happens during process shutdown */ + return -ESRCH; - if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { - /* - * check that we only use anonymous memory to prevent problems - * with writeback - */ - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; - struct vm_area_struct *vma; + range = kzalloc(sizeof(*range), GFP_KERNEL); + if (unlikely(!range)) { + r = -ENOMEM; + goto out; + } + range->notifier = &bo->notifier; + range->flags = hmm_range_flags; + range->values = hmm_range_values; + range->pfn_shift = PAGE_SHIFT; + range->start = bo->notifier.interval_tree.start; + range->end = bo->notifier.interval_tree.last + 1; + range->default_flags = hmm_range_flags[HMM_PFN_VALID]; + if (!amdgpu_ttm_tt_is_readonly(ttm)) + range->default_flags |= range->flags[HMM_PFN_WRITE]; - vma = find_vma(mm, gtt->userptr); - if (!vma || vma->vm_file || vma->vm_end < end) { - up_read(&mm->mmap_sem); - return -EPERM; - } + range->pfns = kvmalloc_array(ttm->num_pages, sizeof(*range->pfns), + GFP_KERNEL); + if (unlikely(!range->pfns)) { + r = -ENOMEM; + goto out_free_ranges; } - /* loop enough times using contiguous pages of memory */ - do { - unsigned num_pages = ttm->num_pages - pinned; - uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; - struct page **p = pages + pinned; - struct amdgpu_ttm_gup_task_list guptask; - - guptask.task = current; - spin_lock(>t->guptasklock); - list_add(&guptask.list, >t->guptasks); - spin_unlock(>t->guptasklock); + down_read(&mm->mmap_sem); + vma = find_vma(mm, start); + if (unlikely(!vma || start < vma->vm_start)) { + r = -EFAULT; + goto out_unlock; + } + if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && + vma->vm_file)) { + r = -EPERM; + goto out_unlock; + } + up_read(&mm->mmap_sem); + timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); - if (mm == current->mm) - r = get_user_pages(userptr, num_pages, flags, p, NULL); - else - r = get_user_pages_remote(gtt->usertask, - mm, userptr, num_pages, - flags, p, NULL, NULL); +retry: + range->notifier_seq = mmu_interval_read_begin(&bo->notifier); - spin_lock(>t->guptasklock); - list_del(&guptask.list); - spin_unlock(>t->guptasklock); + down_read(&mm->mmap_sem); + r = hmm_range_fault(range, 0); + up_read(&mm->mmap_sem); + if (unlikely(r <= 0)) { + /* + * FIXME: This timeout should encompass the retry from + * mmu_interval_read_retry() as well. + */ + if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout)) + goto retry; + goto out_free_pfns; + } - if (r < 0) - goto release_pages; + for (i = 0; i < ttm->num_pages; i++) { + /* FIXME: The pages cannot be touched outside the notifier_lock */ + pages[i] = hmm_device_entry_to_page(range, range->pfns[i]); + if (unlikely(!pages[i])) { + pr_err("Page fault failed for pfn[%lu] = 0x%llx\n", + i, range->pfns[i]); + r = -ENOMEM; - pinned += r; + goto out_free_pfns; + } + } - } while (pinned < ttm->num_pages); + gtt->range = range; + mmput(mm); - up_read(&mm->mmap_sem); return 0; -release_pages: - release_pages(pages, pinned); +out_unlock: up_read(&mm->mmap_sem); +out_free_pfns: + kvfree(range->pfns); +out_free_ranges: + kfree(range); +out: + mmput(mm); return r; } /** - * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary. + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change + * Check if the pages backing this ttm range have been invalidated * - * Called by amdgpu_cs_list_validate(). This creates the page list - * that backs user memory and will ultimately be mapped into the device - * address space. + * Returns: true if pages are still valid */ -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; - unsigned i; + bool r = false; - gtt->last_set_pages = atomic_read(>t->mmu_invalidations); - for (i = 0; i < ttm->num_pages; ++i) { - if (ttm->pages[i]) - put_page(ttm->pages[i]); + if (!gtt || !gtt->userptr) + return false; - ttm->pages[i] = pages ? pages[i] : NULL; + DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%lx\n", + gtt->userptr, ttm->num_pages); + + WARN_ONCE(!gtt->range || !gtt->range->pfns, + "No user pages to check\n"); + + if (gtt->range) { + /* + * FIXME: Must always hold notifier_lock for this, and must + * not ignore the return code. + */ + r = mmu_interval_read_retry(gtt->range->notifier, + gtt->range->notifier_seq); + kvfree(gtt->range->pfns); + kfree(gtt->range); + gtt->range = NULL; } + + return !r; } +#endif /** - * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty + * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary. * - * Called while unpinning userptr pages + * Called by amdgpu_cs_list_validate(). This creates the page list + * that backs user memory and will ultimately be mapped into the device + * address space. */ -void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm) +void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; - unsigned i; - - for (i = 0; i < ttm->num_pages; ++i) { - struct page *page = ttm->pages[i]; + unsigned long i; - if (!page) - continue; - - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) - set_page_dirty(page); - - mark_page_accessed(page); - } + for (i = 0; i < ttm->num_pages; ++i) + ttm->pages[i] = pages ? pages[i] : NULL; } /** @@ -903,10 +1006,22 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) /* unmap the pages mapped to the device */ dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); - /* mark the pages as dirty */ - amdgpu_ttm_tt_mark_user_pages(ttm); - sg_free_table(ttm->sg); + +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) + if (gtt->range) { + unsigned long i; + + for (i = 0; i < ttm->num_pages; i++) { + if (ttm->pages[i] != + hmm_device_entry_to_page(gtt->range, + gtt->range->pfns[i])) + break; + } + + WARN((i == ttm->num_pages), "Missing get_user_page_done\n"); + } +#endif } int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, @@ -927,8 +1042,8 @@ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, goto gart_bind_fail; /* Patch mtype of the second part BO */ - flags &= ~AMDGPU_PTE_MTYPE_MASK; - flags |= AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_NC); + flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; + flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); r = amdgpu_gart_bind(adev, gtt->offset + (page_idx << PAGE_SHIFT), @@ -1132,16 +1247,14 @@ static struct ttm_backend_func amdgpu_backend_func = { static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, uint32_t page_flags) { - struct amdgpu_device *adev; struct amdgpu_ttm_tt *gtt; - adev = amdgpu_ttm_adev(bo->bdev); - gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL); if (gtt == NULL) { return NULL; } gtt->ttm.ttm.func = &amdgpu_backend_func; + gtt->gobj = &bo->base; /* allocate space for the uninitialized page entries */ if (ttm_sg_tt_init(>t->ttm, bo, page_flags)) { @@ -1162,7 +1275,6 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, { struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; - bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); /* user pages are bound by amdgpu_ttm_tt_pin_userptr() */ if (gtt && gtt->userptr) { @@ -1175,7 +1287,19 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, return 0; } - if (slave && ttm->sg) { + if (ttm->page_flags & TTM_PAGE_FLAG_SG) { + if (!ttm->sg) { + struct dma_buf_attachment *attach; + struct sg_table *sgt; + + attach = gtt->gobj->import_attach; + sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); + if (IS_ERR(sgt)) + return PTR_ERR(sgt); + + ttm->sg = sgt; + } + drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, gtt->ttm.dma_address, ttm->num_pages); @@ -1202,9 +1326,8 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, */ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) { - struct amdgpu_device *adev; struct amdgpu_ttm_tt *gtt = (void *)ttm; - bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); + struct amdgpu_device *adev; if (gtt && gtt->userptr) { amdgpu_ttm_tt_set_user_pages(ttm, NULL); @@ -1213,7 +1336,16 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) return; } - if (slave) + if (ttm->sg && gtt->gobj->import_attach) { + struct dma_buf_attachment *attach; + + attach = gtt->gobj->import_attach; + dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL); + ttm->sg = NULL; + return; + } + + if (ttm->page_flags & TTM_PAGE_FLAG_SG) return; adev = amdgpu_ttm_adev(ttm->bdev); @@ -1256,11 +1388,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, gtt->usertask = current->group_leader; get_task_struct(gtt->usertask); - spin_lock_init(>t->guptasklock); - INIT_LIST_HEAD(>t->guptasks); - atomic_set(>t->mmu_invalidations, 0); - gtt->last_set_pages = 0; - return 0; } @@ -1289,7 +1416,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end) { struct amdgpu_ttm_tt *gtt = (void *)ttm; - struct amdgpu_ttm_gup_task_list *entry; unsigned long size; if (gtt == NULL || !gtt->userptr) @@ -1302,48 +1428,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, if (gtt->userptr > end || gtt->userptr + size <= start) return false; - /* Search the lists of tasks that hold this mapping and see - * if current is one of them. If it is return false. - */ - spin_lock(>t->guptasklock); - list_for_each_entry(entry, >t->guptasks, list) { - if (entry->task == current) { - spin_unlock(>t->guptasklock); - return false; - } - } - spin_unlock(>t->guptasklock); - - atomic_inc(>t->mmu_invalidations); - return true; } /** - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated? + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr? */ -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, - int *last_invalidated) -{ - struct amdgpu_ttm_tt *gtt = (void *)ttm; - int prev_invalidated = *last_invalidated; - - *last_invalidated = atomic_read(>t->mmu_invalidations); - return prev_invalidated != *last_invalidated; -} - -/** - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object - * been invalidated since the last time they've been set? - */ -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; if (gtt == NULL || !gtt->userptr) return false; - return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages; + return true; } /** @@ -1420,19 +1518,26 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, { unsigned long num_pages = bo->mem.num_pages; struct drm_mm_node *node = bo->mem.mm_node; - struct reservation_object_list *flist; + struct dma_resv_list *flist; struct dma_fence *f; int i; + /* Don't evict VM page tables while they are busy, otherwise we can't + * cleanly handle page faults. + */ + if (bo->type == ttm_bo_type_kernel && + !dma_resv_test_signaled_rcu(bo->base.resv, true)) + return false; + /* If bo is a KFD BO, check if the bo belongs to the current process. * If true, then return false as any KFD process needs all its BOs to * be resident to run successfully */ - flist = reservation_object_get_list(bo->resv); + flist = dma_resv_get_list(bo->base.resv); if (flist) { for (i = 0; i < flist->shared_count; ++i) { f = rcu_dereference_protected(flist->shared[i], - reservation_object_held(bo->resv)); + dma_resv_held(bo->base.resv)); if (amdkfd_fence_check_mm(f, current->mm)) return false; } @@ -1542,6 +1647,7 @@ static struct ttm_bo_driver amdgpu_bo_driver = { .move = &amdgpu_bo_move, .verify_access = &amdgpu_verify_access, .move_notify = &amdgpu_bo_move_notify, + .release_notify = &amdgpu_bo_release_notify, .fault_reserve_notify = &amdgpu_bo_fault_reserve_notify, .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, .io_mem_free = &amdgpu_ttm_io_mem_free, @@ -1575,81 +1681,105 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev) */ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) { - struct ttm_operation_ctx ctx = { false, false }; - struct amdgpu_bo_param bp; - int r = 0; - int i; - u64 vram_size = adev->gmc.visible_vram_size; - u64 offset = adev->fw_vram_usage.start_offset; - u64 size = adev->fw_vram_usage.size; - struct amdgpu_bo *bo; - - memset(&bp, 0, sizeof(bp)); - bp.size = adev->fw_vram_usage.size; - bp.byte_align = PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; - bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - bp.type = ttm_bo_type_kernel; - bp.resv = NULL; + uint64_t vram_size = adev->gmc.visible_vram_size; + adev->fw_vram_usage.va = NULL; adev->fw_vram_usage.reserved_bo = NULL; - if (adev->fw_vram_usage.size > 0 && - adev->fw_vram_usage.size <= vram_size) { + if (adev->fw_vram_usage.size == 0 || + adev->fw_vram_usage.size > vram_size) + return 0; - r = amdgpu_bo_create(adev, &bp, - &adev->fw_vram_usage.reserved_bo); - if (r) - goto error_create; + return amdgpu_bo_create_kernel_at(adev, + adev->fw_vram_usage.start_offset, + adev->fw_vram_usage.size, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->fw_vram_usage.reserved_bo, + &adev->fw_vram_usage.va); +} - r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false); - if (r) - goto error_reserve; +/* + * Memoy training reservation functions + */ - /* remove the original mem node and create a new one at the - * request position - */ - bo = adev->fw_vram_usage.reserved_bo; - offset = ALIGN(offset, PAGE_SIZE); - for (i = 0; i < bo->placement.num_placement; ++i) { - bo->placements[i].fpfn = offset >> PAGE_SHIFT; - bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT; - } +/** + * amdgpu_ttm_training_reserve_vram_fini - free memory training reserved vram + * + * @adev: amdgpu_device pointer + * + * free memory training reserved vram if it has been reserved. + */ +static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev) +{ + struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; - ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem); - r = ttm_bo_mem_space(&bo->tbo, &bo->placement, - &bo->tbo.mem, &ctx); - if (r) - goto error_pin; + ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT; + amdgpu_bo_free_kernel(&ctx->c2p_bo, NULL, NULL); + ctx->c2p_bo = NULL; - r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo, - AMDGPU_GEM_DOMAIN_VRAM, - adev->fw_vram_usage.start_offset, - (adev->fw_vram_usage.start_offset + - adev->fw_vram_usage.size)); - if (r) - goto error_pin; - r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo, - &adev->fw_vram_usage.va); - if (r) - goto error_kmap; + amdgpu_bo_free_kernel(&ctx->p2c_bo, NULL, NULL); + ctx->p2c_bo = NULL; + + return 0; +} + +/** + * amdgpu_ttm_training_reserve_vram_init - create bo vram reservation from memory training + * + * @adev: amdgpu_device pointer + * + * create bo vram reservation from memory training. + */ +static int amdgpu_ttm_training_reserve_vram_init(struct amdgpu_device *adev) +{ + int ret; + struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; - amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); + memset(ctx, 0, sizeof(*ctx)); + if (!adev->fw_vram_usage.mem_train_support) { + DRM_DEBUG("memory training does not support!\n"); + return 0; } - return r; -error_kmap: - amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo); -error_pin: - amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); -error_reserve: - amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo); -error_create: - adev->fw_vram_usage.va = NULL; - adev->fw_vram_usage.reserved_bo = NULL; - return r; + ctx->c2p_train_data_offset = adev->fw_vram_usage.mem_train_fb_loc; + ctx->p2c_train_data_offset = (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET); + ctx->train_data_size = GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES; + + DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n", + ctx->train_data_size, + ctx->p2c_train_data_offset, + ctx->c2p_train_data_offset); + + ret = amdgpu_bo_create_kernel_at(adev, + ctx->p2c_train_data_offset, + ctx->train_data_size, + AMDGPU_GEM_DOMAIN_VRAM, + &ctx->p2c_bo, + NULL); + if (ret) { + DRM_ERROR("alloc p2c_bo failed(%d)!\n", ret); + goto Err_out; + } + + ret = amdgpu_bo_create_kernel_at(adev, + ctx->c2p_train_data_offset, + ctx->train_data_size, + AMDGPU_GEM_DOMAIN_VRAM, + &ctx->c2p_bo, + NULL); + if (ret) { + DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret); + goto Err_out; + } + + ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; + return 0; + +Err_out: + amdgpu_ttm_training_reserve_vram_fini(adev); + return ret; } + /** * amdgpu_ttm_init - Init the memory management (ttm) as well as various * gtt/vram related fields. @@ -1664,6 +1794,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) uint64_t gtt_size; int r; u64 vis_vram_limit; + void *stolen_vga_buf; mutex_init(&adev->mman.gtt_window_lock); @@ -1671,8 +1802,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) r = ttm_bo_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->ddev->anon_inode->i_mapping, - DRM_FILE_PAGE_OFFSET, - adev->need_dma32); + adev->ddev->vma_offset_manager, + dma_addressing_limited(adev->dev)); if (r) { DRM_ERROR("failed initializing buffer object driver(%d).\n", r); return r; @@ -1712,6 +1843,14 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; } + /* + *The reserved vram for memory training must be pinned to the specified + *place on the VRAM, so reserve it early. + */ + r = amdgpu_ttm_training_reserve_vram_init(adev); + if (r) + return r; + /* allocate memory as required for VGA * This is used for VGA emulation and pre-OS scanout buffers to * avoid display artifacts while transitioning between pre-OS @@ -1719,9 +1858,23 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &adev->stolen_vga_memory, - NULL, NULL); + NULL, &stolen_vga_buf); + if (r) + return r; + + /* + * reserve one TMR (64K) memory at the top of VRAM which holds + * IP Discovery data and is protected by PSP. + */ + r = amdgpu_bo_create_kernel_at(adev, + adev->gmc.real_vram_size - DISCOVERY_TMR_SIZE, + DISCOVERY_TMR_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->discovery_memory, + NULL); if (r) return r; + DRM_INFO("amdgpu: %uM of VRAM memory ready\n", (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); @@ -1749,44 +1902,26 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* Initialize various on-chip memory pools */ r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS, - adev->gds.mem.total_size); + adev->gds.gds_size); if (r) { DRM_ERROR("Failed initializing GDS heap.\n"); return r; } - r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, - 4, AMDGPU_GEM_DOMAIN_GDS, - &adev->gds.gds_gfx_bo, NULL, NULL); - if (r) - return r; - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS, - adev->gds.gws.total_size); + adev->gds.gws_size); if (r) { DRM_ERROR("Failed initializing gws heap.\n"); return r; } - r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, - 1, AMDGPU_GEM_DOMAIN_GWS, - &adev->gds.gws_gfx_bo, NULL, NULL); - if (r) - return r; - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA, - adev->gds.oa.total_size); + adev->gds.oa_size); if (r) { DRM_ERROR("Failed initializing oa heap.\n"); return r; } - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, - 1, AMDGPU_GEM_DOMAIN_OA, - &adev->gds.oa_gfx_bo, NULL, NULL); - if (r) - return r; - /* Register debugfs entries for amdgpu_ttm */ r = amdgpu_ttm_debugfs_init(adev); if (r) { @@ -1801,8 +1936,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) */ void amdgpu_ttm_late_init(struct amdgpu_device *adev) { + void *stolen_vga_buf; /* return the VGA stolen memory (if any) back to VRAM */ - amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); + amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf); } /** @@ -1814,7 +1950,11 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) return; amdgpu_ttm_debugfs_fini(adev); + amdgpu_ttm_training_reserve_vram_fini(adev); + /* return the IP Discovery TMR memory back to VRAM */ + amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL); amdgpu_ttm_fw_reserve_vram_fini(adev); + if (adev->mman.aper_base_kaddr) iounmap(adev->mman.aper_base_kaddr); adev->mman.aper_base_kaddr = NULL; @@ -1877,14 +2017,9 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma) { - struct drm_file *file_priv; - struct amdgpu_device *adev; - - if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET)) - return -EINVAL; + struct drm_file *file_priv = filp->private_data; + struct amdgpu_device *adev = file_priv->minor->dev->dev_private; - file_priv = filp->private_data; - adev = file_priv->minor->dev->dev_private; if (adev == NULL) return -EINVAL; @@ -1915,10 +2050,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo, *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GPU_PAGE_SIZE; - num_dw = adev->mman.buffer_funcs->copy_num_dw; - while (num_dw & 0x7) - num_dw++; - + num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); num_bytes = num_pages * 8; r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, &job); @@ -1959,7 +2091,7 @@ error_free: int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - struct reservation_object *resv, + struct dma_resv *resv, struct dma_fence **fence, bool direct_submit, bool vm_needs_flush) { @@ -1978,11 +2110,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, max_bytes = adev->mman.buffer_funcs->copy_max_bytes; num_loops = DIV_ROUND_UP(byte_count, max_bytes); - num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw; - - /* for IB padding */ - while (num_dw & 0x7) - num_dw++; + num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8); r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job); if (r) @@ -2033,7 +2161,7 @@ error_free: int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, - struct reservation_object *resv, + struct dma_resv *resv, struct dma_fence **fence) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -2062,9 +2190,9 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, mm_node = bo->tbo.mem.mm_node; num_loops = 0; while (num_pages) { - uint32_t byte_count = mm_node->size << PAGE_SHIFT; + uint64_t byte_count = mm_node->size << PAGE_SHIFT; - num_loops += DIV_ROUND_UP(byte_count, max_bytes); + num_loops += DIV_ROUND_UP_ULL(byte_count, max_bytes); num_pages -= mm_node->size; ++mm_node; } @@ -2090,12 +2218,13 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, mm_node = bo->tbo.mem.mm_node; while (num_pages) { - uint32_t byte_count = mm_node->size << PAGE_SHIFT; + uint64_t byte_count = mm_node->size << PAGE_SHIFT; uint64_t dst_addr; dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem); while (byte_count) { - uint32_t cur_size_in_bytes = min(byte_count, max_bytes); + uint32_t cur_size_in_bytes = min_t(uint64_t, byte_count, + max_bytes); amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, dst_addr, cur_size_in_bytes); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index b5b2d101f7db..0dddedc06ae3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -38,6 +38,8 @@ #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 +#define AMDGPU_POISON 0xd0bed0be + struct amdgpu_mman { struct ttm_bo_device bdev; bool mem_global_referenced; @@ -83,27 +85,40 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - struct reservation_object *resv, + struct dma_resv *resv, struct dma_fence **fence, bool direct_submit, bool vm_needs_flush); int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, struct amdgpu_copy_mem *src, struct amdgpu_copy_mem *dst, uint64_t size, - struct reservation_object *resv, + struct dma_resv *resv, struct dma_fence **f); int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, - struct reservation_object *resv, + struct dma_resv *resv, struct dma_fence **fence); int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); -int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages); +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm); +#else +static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, + struct page **pages) +{ + return -EPERM; +} +static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) +{ + return false; +} +#endif + void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); -void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm); int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, uint32_t flags); bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm); @@ -112,7 +127,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end); bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, int *last_invalidated); -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm); +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm); bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem); uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 7b33867036e7..833fc4b68940 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -24,7 +24,7 @@ #include <linux/firmware.h> #include <linux/slab.h> #include <linux/module.h> -#include <drm/drmP.h> + #include "amdgpu.h" #include "amdgpu_ucode.h" @@ -77,6 +77,14 @@ void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr) container_of(hdr, struct smc_firmware_header_v1_0, header); DRM_DEBUG("ucode_start_addr: %u\n", le32_to_cpu(smc_hdr->ucode_start_addr)); + } else if (version_major == 2) { + const struct smc_firmware_header_v1_0 *v1_hdr = + container_of(hdr, struct smc_firmware_header_v1_0, header); + const struct smc_firmware_header_v2_0 *v2_hdr = + container_of(v1_hdr, struct smc_firmware_header_v2_0, v1_0); + + DRM_DEBUG("ppt_offset_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_offset_bytes)); + DRM_DEBUG("ppt_size_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_size_bytes)); } else { DRM_ERROR("Unknown SMC ucode version: %u.%u\n", version_major, version_minor); } @@ -227,6 +235,56 @@ void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr) } } +void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr) +{ + uint16_t version_major = le16_to_cpu(hdr->header_version_major); + uint16_t version_minor = le16_to_cpu(hdr->header_version_minor); + + DRM_DEBUG("PSP\n"); + amdgpu_ucode_print_common_hdr(hdr); + + if (version_major == 1) { + const struct psp_firmware_header_v1_0 *psp_hdr = + container_of(hdr, struct psp_firmware_header_v1_0, header); + + DRM_DEBUG("ucode_feature_version: %u\n", + le32_to_cpu(psp_hdr->ucode_feature_version)); + DRM_DEBUG("sos_offset_bytes: %u\n", + le32_to_cpu(psp_hdr->sos_offset_bytes)); + DRM_DEBUG("sos_size_bytes: %u\n", + le32_to_cpu(psp_hdr->sos_size_bytes)); + if (version_minor == 1) { + const struct psp_firmware_header_v1_1 *psp_hdr_v1_1 = + container_of(psp_hdr, struct psp_firmware_header_v1_1, v1_0); + DRM_DEBUG("toc_header_version: %u\n", + le32_to_cpu(psp_hdr_v1_1->toc_header_version)); + DRM_DEBUG("toc_offset_bytes: %u\n", + le32_to_cpu(psp_hdr_v1_1->toc_offset_bytes)); + DRM_DEBUG("toc_size_bytes: %u\n", + le32_to_cpu(psp_hdr_v1_1->toc_size_bytes)); + DRM_DEBUG("kdb_header_version: %u\n", + le32_to_cpu(psp_hdr_v1_1->kdb_header_version)); + DRM_DEBUG("kdb_offset_bytes: %u\n", + le32_to_cpu(psp_hdr_v1_1->kdb_offset_bytes)); + DRM_DEBUG("kdb_size_bytes: %u\n", + le32_to_cpu(psp_hdr_v1_1->kdb_size_bytes)); + } + if (version_minor == 2) { + const struct psp_firmware_header_v1_2 *psp_hdr_v1_2 = + container_of(psp_hdr, struct psp_firmware_header_v1_2, v1_0); + DRM_DEBUG("kdb_header_version: %u\n", + le32_to_cpu(psp_hdr_v1_2->kdb_header_version)); + DRM_DEBUG("kdb_offset_bytes: %u\n", + le32_to_cpu(psp_hdr_v1_2->kdb_offset_bytes)); + DRM_DEBUG("kdb_size_bytes: %u\n", + le32_to_cpu(psp_hdr_v1_2->kdb_size_bytes)); + } + } else { + DRM_ERROR("Unknown PSP ucode version: %u.%u\n", + version_major, version_minor); + } +} + void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr) { uint16_t version_major = le16_to_cpu(hdr->header_version_major); @@ -302,10 +360,16 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) case CHIP_RAVEN: case CHIP_VEGA12: case CHIP_VEGA20: + case CHIP_ARCTURUS: + case CHIP_RENOIR: + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: if (!load_type) return AMDGPU_FW_LOAD_DIRECT; else return AMDGPU_FW_LOAD_PSP; + default: DRM_ERROR("Unknown firmware load type\n"); } @@ -313,6 +377,69 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) return AMDGPU_FW_LOAD_DIRECT; } +#define FW_VERSION_ATTR(name, mode, field) \ +static ssize_t show_##name(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + struct drm_device *ddev = dev_get_drvdata(dev); \ + struct amdgpu_device *adev = ddev->dev_private; \ + \ + return snprintf(buf, PAGE_SIZE, "0x%08x\n", adev->field); \ +} \ +static DEVICE_ATTR(name, mode, show_##name, NULL) + +FW_VERSION_ATTR(vce_fw_version, 0444, vce.fw_version); +FW_VERSION_ATTR(uvd_fw_version, 0444, uvd.fw_version); +FW_VERSION_ATTR(mc_fw_version, 0444, gmc.fw_version); +FW_VERSION_ATTR(me_fw_version, 0444, gfx.me_fw_version); +FW_VERSION_ATTR(pfp_fw_version, 0444, gfx.pfp_fw_version); +FW_VERSION_ATTR(ce_fw_version, 0444, gfx.ce_fw_version); +FW_VERSION_ATTR(rlc_fw_version, 0444, gfx.rlc_fw_version); +FW_VERSION_ATTR(rlc_srlc_fw_version, 0444, gfx.rlc_srlc_fw_version); +FW_VERSION_ATTR(rlc_srlg_fw_version, 0444, gfx.rlc_srlg_fw_version); +FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version); +FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version); +FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version); +FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos_fw_version); +FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_fw_version); +FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ta_fw_version); +FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.ta_fw_version); +FW_VERSION_ATTR(smc_fw_version, 0444, pm.fw_version); +FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version); +FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version); +FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version); +FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version); + +static struct attribute *fw_attrs[] = { + &dev_attr_vce_fw_version.attr, &dev_attr_uvd_fw_version.attr, + &dev_attr_mc_fw_version.attr, &dev_attr_me_fw_version.attr, + &dev_attr_pfp_fw_version.attr, &dev_attr_ce_fw_version.attr, + &dev_attr_rlc_fw_version.attr, &dev_attr_rlc_srlc_fw_version.attr, + &dev_attr_rlc_srlg_fw_version.attr, &dev_attr_rlc_srls_fw_version.attr, + &dev_attr_mec_fw_version.attr, &dev_attr_mec2_fw_version.attr, + &dev_attr_sos_fw_version.attr, &dev_attr_asd_fw_version.attr, + &dev_attr_ta_ras_fw_version.attr, &dev_attr_ta_xgmi_fw_version.attr, + &dev_attr_smc_fw_version.attr, &dev_attr_sdma_fw_version.attr, + &dev_attr_sdma2_fw_version.attr, &dev_attr_vcn_fw_version.attr, + &dev_attr_dmcu_fw_version.attr, NULL +}; + +static const struct attribute_group fw_attr_group = { + .name = "fw_version", + .attrs = fw_attrs +}; + +int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev) +{ + return sysfs_create_group(&adev->dev->kobj, &fw_attr_group); +} + +void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev) +{ + sysfs_remove_group(&adev->dev->kobj, &fw_attr_group); +} + static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, struct amdgpu_firmware_info *ucode, uint64_t mc_addr, void *kptr) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 7ac25a1c7853..410587b950f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -23,6 +23,8 @@ #ifndef __AMDGPU_UCODE_H__ #define __AMDGPU_UCODE_H__ +#include "amdgpu_socbb.h" + struct common_firmware_header { uint32_t size_bytes; /* size of the entire header+image(s) in bytes */ uint32_t header_size_bytes; /* size of just the header in bytes */ @@ -49,6 +51,26 @@ struct smc_firmware_header_v1_0 { uint32_t ucode_start_addr; }; +/* version_major=2, version_minor=0 */ +struct smc_firmware_header_v2_0 { + struct smc_firmware_header_v1_0 v1_0; + uint32_t ppt_offset_bytes; /* soft pptable offset */ + uint32_t ppt_size_bytes; /* soft pptable size */ +}; + +struct smc_soft_pptable_entry { + uint32_t id; + uint32_t ppt_offset_bytes; + uint32_t ppt_size_bytes; +}; + +/* version_major=2, version_minor=1 */ +struct smc_firmware_header_v2_1 { + struct smc_firmware_header_v1_0 v1_0; + uint32_t pptable_count; + uint32_t pptable_entry_offset; +}; + /* version_major=1, version_minor=0 */ struct psp_firmware_header_v1_0 { struct common_firmware_header header; @@ -57,6 +79,26 @@ struct psp_firmware_header_v1_0 { uint32_t sos_size_bytes; }; +/* version_major=1, version_minor=1 */ +struct psp_firmware_header_v1_1 { + struct psp_firmware_header_v1_0 v1_0; + uint32_t toc_header_version; + uint32_t toc_offset_bytes; + uint32_t toc_size_bytes; + uint32_t kdb_header_version; + uint32_t kdb_offset_bytes; + uint32_t kdb_size_bytes; +}; + +/* version_major=1, version_minor=2 */ +struct psp_firmware_header_v1_2 { + struct psp_firmware_header_v1_0 v1_0; + uint32_t reserve[3]; + uint32_t kdb_header_version; + uint32_t kdb_offset_bytes; + uint32_t kdb_size_bytes; +}; + /* version_major=1, version_minor=0 */ struct ta_firmware_header_v1_0 { struct common_firmware_header header; @@ -66,6 +108,12 @@ struct ta_firmware_header_v1_0 { uint32_t ta_ras_ucode_version; uint32_t ta_ras_offset_bytes; uint32_t ta_ras_size_bytes; + uint32_t ta_hdcp_ucode_version; + uint32_t ta_hdcp_offset_bytes; + uint32_t ta_hdcp_size_bytes; + uint32_t ta_dtm_ucode_version; + uint32_t ta_dtm_offset_bytes; + uint32_t ta_dtm_size_bytes; }; /* version_major=1, version_minor=0 */ @@ -77,6 +125,21 @@ struct gfx_firmware_header_v1_0 { }; /* version_major=1, version_minor=0 */ +struct mes_firmware_header_v1_0 { + struct common_firmware_header header; + uint32_t mes_ucode_version; + uint32_t mes_ucode_size_bytes; + uint32_t mes_ucode_offset_bytes; + uint32_t mes_ucode_data_version; + uint32_t mes_ucode_data_size_bytes; + uint32_t mes_ucode_data_offset_bytes; + uint32_t mes_uc_start_addr_lo; + uint32_t mes_uc_start_addr_hi; + uint32_t mes_data_start_addr_lo; + uint32_t mes_data_start_addr_hi; +}; + +/* version_major=1, version_minor=0 */ struct rlc_firmware_header_v1_0 { struct common_firmware_header header; uint32_t ucode_feature_version; @@ -161,6 +224,19 @@ struct gpu_info_firmware_v1_0 { uint32_t gc_lds_size; }; +struct gpu_info_firmware_v1_1 { + struct gpu_info_firmware_v1_0 v1_0; + uint32_t num_sc_per_sh; + uint32_t num_packer_per_sc; +}; + +/* gpu info payload + * version_major=1, version_minor=1 */ +struct gpu_info_firmware_v1_2 { + struct gpu_info_firmware_v1_1 v1_1; + struct gpu_info_soc_bounding_box_v1_0 soc_bounding_box; +}; + /* version_major=1, version_minor=0 */ struct gpu_info_firmware_header_v1_0 { struct common_firmware_header header; @@ -180,7 +256,9 @@ union amdgpu_firmware_header { struct common_firmware_header common; struct mc_firmware_header_v1_0 mc; struct smc_firmware_header_v1_0 smc; + struct smc_firmware_header_v2_0 smc_v2_0; struct psp_firmware_header_v1_0 psp; + struct psp_firmware_header_v1_1 psp_v1_1; struct ta_firmware_header_v1_0 ta; struct gfx_firmware_header_v1_0 gfx; struct rlc_firmware_header_v1_0 rlc; @@ -199,6 +277,12 @@ union amdgpu_firmware_header { enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_SDMA0 = 0, AMDGPU_UCODE_ID_SDMA1, + AMDGPU_UCODE_ID_SDMA2, + AMDGPU_UCODE_ID_SDMA3, + AMDGPU_UCODE_ID_SDMA4, + AMDGPU_UCODE_ID_SDMA5, + AMDGPU_UCODE_ID_SDMA6, + AMDGPU_UCODE_ID_SDMA7, AMDGPU_UCODE_ID_CP_CE, AMDGPU_UCODE_ID_CP_PFP, AMDGPU_UCODE_ID_CP_ME, @@ -206,6 +290,8 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_CP_MEC1_JT, AMDGPU_UCODE_ID_CP_MEC2, AMDGPU_UCODE_ID_CP_MEC2_JT, + AMDGPU_UCODE_ID_CP_MES, + AMDGPU_UCODE_ID_CP_MES_DATA, AMDGPU_UCODE_ID_RLC_G, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM, @@ -216,8 +302,11 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_UVD1, AMDGPU_UCODE_ID_VCE, AMDGPU_UCODE_ID_VCN, + AMDGPU_UCODE_ID_VCN1, AMDGPU_UCODE_ID_DMCU_ERAM, AMDGPU_UCODE_ID_DMCU_INTV, + AMDGPU_UCODE_ID_VCN0_RAM, + AMDGPU_UCODE_ID_VCN1_RAM, AMDGPU_UCODE_ID_MAXIMUM, }; @@ -232,6 +321,7 @@ enum amdgpu_firmware_load_type { AMDGPU_FW_LOAD_DIRECT = 0, AMDGPU_FW_LOAD_SMU, AMDGPU_FW_LOAD_PSP, + AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO, }; /* conform to smu_ucode_xfer_cz.h */ @@ -284,6 +374,7 @@ void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr); +void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr); int amdgpu_ucode_validate(const struct firmware *fw); bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr, @@ -291,7 +382,9 @@ bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr, int amdgpu_ucode_init_bo(struct amdgpu_device *adev); int amdgpu_ucode_create_bo(struct amdgpu_device *adev); +int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev); void amdgpu_ucode_free_bo(struct amdgpu_device *adev); +void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev); enum amdgpu_firmware_load_type amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c new file mode 100644 index 000000000000..d4fb9cf27e21 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -0,0 +1,158 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu_ras.h" + +int amdgpu_umc_ras_late_init(struct amdgpu_device *adev) +{ + int r; + struct ras_fs_if fs_info = { + .sysfs_name = "umc_err_count", + .debugfs_name = "umc_err_inject", + }; + struct ras_ih_if ih_info = { + .cb = amdgpu_umc_process_ras_data_cb, + }; + + if (!adev->umc.ras_if) { + adev->umc.ras_if = + kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!adev->umc.ras_if) + return -ENOMEM; + adev->umc.ras_if->block = AMDGPU_RAS_BLOCK__UMC; + adev->umc.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->umc.ras_if->sub_block_index = 0; + strcpy(adev->umc.ras_if->name, "umc"); + } + ih_info.head = fs_info.head = *adev->umc.ras_if; + + r = amdgpu_ras_late_init(adev, adev->umc.ras_if, + &fs_info, &ih_info); + if (r) + goto free; + + if (amdgpu_ras_is_supported(adev, adev->umc.ras_if->block)) { + r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); + if (r) + goto late_fini; + } else { + r = 0; + goto free; + } + + /* ras init of specific umc version */ + if (adev->umc.funcs && adev->umc.funcs->err_cnt_init) + adev->umc.funcs->err_cnt_init(adev); + + return 0; + +late_fini: + amdgpu_ras_late_fini(adev, adev->umc.ras_if, &ih_info); +free: + kfree(adev->umc.ras_if); + adev->umc.ras_if = NULL; + return r; +} + +void amdgpu_umc_ras_fini(struct amdgpu_device *adev) +{ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) && + adev->umc.ras_if) { + struct ras_common_if *ras_if = adev->umc.ras_if; + struct ras_ih_if ih_info = { + .head = *ras_if, + .cb = amdgpu_umc_process_ras_data_cb, + }; + + amdgpu_ras_late_fini(adev, ras_if, &ih_info); + kfree(ras_if); + } +} + +int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, + void *ras_error_status, + struct amdgpu_iv_entry *entry) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + /* When “Full RAS” is enabled, the per-IP interrupt sources should + * be disabled and the driver should only look for the aggregated + * interrupt via sync flood + */ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return AMDGPU_RAS_SUCCESS; + + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + if (adev->umc.funcs && + adev->umc.funcs->query_ras_error_count) + adev->umc.funcs->query_ras_error_count(adev, ras_error_status); + + if (adev->umc.funcs && + adev->umc.funcs->query_ras_error_address && + adev->umc.max_ras_err_cnt_per_query) { + err_data->err_addr = + kcalloc(adev->umc.max_ras_err_cnt_per_query, + sizeof(struct eeprom_table_record), GFP_KERNEL); + /* still call query_ras_error_address to clear error status + * even NOMEM error is encountered + */ + if(!err_data->err_addr) + DRM_WARN("Failed to alloc memory for umc error address record!\n"); + + /* umc query_ras_error_address is also responsible for clearing + * error status + */ + adev->umc.funcs->query_ras_error_address(adev, ras_error_status); + } + + /* only uncorrectable error needs gpu reset */ + if (err_data->ue_count) { + if (err_data->err_addr_cnt && + amdgpu_ras_add_bad_pages(adev, err_data->err_addr, + err_data->err_addr_cnt)) + DRM_WARN("Failed to add ras bad page!\n"); + + amdgpu_ras_reset_gpu(adev, 0); + } + + kfree(err_data->err_addr); + return AMDGPU_RAS_SUCCESS; +} + +int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct ras_common_if *ras_if = adev->umc.ras_if; + struct ras_dispatch_if ih_data = { + .entry = entry, + }; + + if (!ras_if) + return 0; + + ih_data.head = *ras_if; + + amdgpu_ras_interrupt_dispatch(adev, &ih_data); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h new file mode 100644 index 000000000000..3283032a78e5 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __AMDGPU_UMC_H__ +#define __AMDGPU_UMC_H__ + +/* implement 64 bits REG operations via 32 bits interface */ +#define RREG64_UMC(reg) (RREG32(reg) | \ + ((uint64_t)RREG32((reg) + 1) << 32)) +#define WREG64_UMC(reg, v) \ + do { \ + WREG32((reg), lower_32_bits(v)); \ + WREG32((reg) + 1, upper_32_bits(v)); \ + } while (0) + +/* + * void (*func)(struct amdgpu_device *adev, struct ras_err_data *err_data, + * uint32_t umc_reg_offset, uint32_t channel_index) + */ +#define amdgpu_umc_for_each_channel(func) \ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; \ + uint32_t umc_inst, channel_inst, umc_reg_offset, channel_index; \ + for (umc_inst = 0; umc_inst < adev->umc.umc_inst_num; umc_inst++) { \ + /* enable the index mode to query eror count per channel */ \ + adev->umc.funcs->enable_umc_index_mode(adev, umc_inst); \ + for (channel_inst = 0; \ + channel_inst < adev->umc.channel_inst_num; \ + channel_inst++) { \ + /* calc the register offset according to channel instance */ \ + umc_reg_offset = adev->umc.channel_offs * channel_inst; \ + /* get channel index of interleaved memory */ \ + channel_index = adev->umc.channel_idx_tbl[ \ + umc_inst * adev->umc.channel_inst_num + channel_inst]; \ + (func)(adev, err_data, umc_reg_offset, channel_index); \ + } \ + } \ + adev->umc.funcs->disable_umc_index_mode(adev); + +struct amdgpu_umc_funcs { + void (*err_cnt_init)(struct amdgpu_device *adev); + int (*ras_late_init)(struct amdgpu_device *adev); + void (*query_ras_error_count)(struct amdgpu_device *adev, + void *ras_error_status); + void (*query_ras_error_address)(struct amdgpu_device *adev, + void *ras_error_status); + void (*enable_umc_index_mode)(struct amdgpu_device *adev, + uint32_t umc_instance); + void (*disable_umc_index_mode)(struct amdgpu_device *adev); + void (*init_registers)(struct amdgpu_device *adev); +}; + +struct amdgpu_umc { + /* max error count in one ras query call */ + uint32_t max_ras_err_cnt_per_query; + /* number of umc channel instance with memory map register access */ + uint32_t channel_inst_num; + /* number of umc instance with memory map register access */ + uint32_t umc_inst_num; + /* UMC regiser per channel offset */ + uint32_t channel_offs; + /* channel index table of interleaved memory */ + const uint32_t *channel_idx_tbl; + struct ras_common_if *ras_if; + + const struct amdgpu_umc_funcs *funcs; +}; + +int amdgpu_umc_ras_late_init(struct amdgpu_device *adev); +void amdgpu_umc_ras_fini(struct amdgpu_device *adev); +int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, + void *ras_error_status, + struct amdgpu_iv_entry *entry); +int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 4e5d13e41f6a..e324bfe6c58f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -30,7 +30,7 @@ #include <linux/firmware.h> #include <linux/module.h> -#include <drm/drmP.h> + #include <drm/drm.h> #include "amdgpu.h" @@ -39,6 +39,8 @@ #include "cikd.h" #include "uvd/uvd_4_2_d.h" +#include "amdgpu_ras.h" + /* 1 second timeout */ #define UVD_IDLE_TIMEOUT msecs_to_jiffies(1000) @@ -297,6 +299,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) { int i, j; + cancel_delayed_work_sync(&adev->uvd.idle_work); drm_sched_entity_destroy(&adev->uvd.entity); for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { @@ -372,7 +375,13 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) if (!adev->uvd.inst[j].saved_bo) return -ENOMEM; - memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size); + /* re-write 0 since err_event_athub will corrupt VCPU buffer */ + if (amdgpu_ras_intr_triggered()) { + DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n"); + memset(adev->uvd.inst[j].saved_bo, 0, size); + } else { + memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size); + } } return 0; } @@ -1073,7 +1082,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, ib->length_dw = 16; if (direct) { - r = reservation_object_wait_timeout_rcu(bo->tbo.resv, + r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false, msecs_to_jiffies(10)); if (r == 0) @@ -1085,7 +1094,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, if (r) goto err_free; } else { - r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv, + r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.base.resv, AMDGPU_FENCE_OWNER_UNDEFINED, false); if (r) goto err_free; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index c021b114c8a4..46b590af2fd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -27,7 +27,7 @@ #include <linux/firmware.h> #include <linux/module.h> -#include <drm/drmP.h> + #include <drm/drm.h> #include "amdgpu.h" @@ -80,6 +80,11 @@ MODULE_FIRMWARE(FIRMWARE_VEGA12); MODULE_FIRMWARE(FIRMWARE_VEGA20); static void amdgpu_vce_idle_work_handler(struct work_struct *work); +static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + struct amdgpu_bo *bo, + struct dma_fence **fence); +static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, + bool direct, struct dma_fence **fence); /** * amdgpu_vce_init - allocate memory, load vce firmware @@ -211,6 +216,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) if (adev->vce.vcpu_bo == NULL) return 0; + cancel_delayed_work_sync(&adev->vce.idle_work); drm_sched_entity_destroy(&adev->vce.entity); amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr, @@ -428,14 +434,15 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) * * Open up a stream for HW test */ -int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, - struct dma_fence **fence) +static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + struct amdgpu_bo *bo, + struct dma_fence **fence) { const unsigned ib_size_dw = 1024; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t dummy; + uint64_t addr; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -444,7 +451,7 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, ib = &job->ibs[0]; - dummy = ib->gpu_addr + 1024; + addr = amdgpu_bo_gpu_offset(bo); /* stitch together an VCE create msg */ ib->length_dw = 0; @@ -476,8 +483,8 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, ib->ptr[ib->length_dw++] = 0x00000014; /* len */ ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */ - ib->ptr[ib->length_dw++] = upper_32_bits(dummy); - ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = addr; ib->ptr[ib->length_dw++] = 0x00000001; for (i = ib->length_dw; i < ib_size_dw; ++i) @@ -507,8 +514,8 @@ err: * * Close up a stream for HW test or if userspace failed to do so */ -int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - bool direct, struct dma_fence **fence) +static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, + bool direct, struct dma_fence **fence) { const unsigned ib_size_dw = 1024; struct amdgpu_job *job; @@ -1072,7 +1079,7 @@ void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t rptr = amdgpu_ring_get_rptr(ring); + uint32_t rptr; unsigned i; int r, timeout = adev->usec_timeout; @@ -1084,13 +1091,15 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) if (r) return r; + rptr = amdgpu_ring_get_rptr(ring); + amdgpu_ring_write(ring, VCE_CMD_END); amdgpu_ring_commit(ring); for (i = 0; i < timeout; i++) { if (amdgpu_ring_get_rptr(ring) != rptr) break; - DRM_UDELAY(1); + udelay(1); } if (i >= timeout) @@ -1108,13 +1117,20 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct dma_fence *fence = NULL; + struct amdgpu_bo *bo = NULL; long r; /* skip vce ring1/2 ib test for now, since it's not reliable */ if (ring != &ring->adev->vce.ring[0]) return 0; - r = amdgpu_vce_get_create_msg(ring, 1, NULL); + r = amdgpu_bo_create_reserved(ring->adev, 512, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &bo, NULL, NULL); + if (r) + return r; + + r = amdgpu_vce_get_create_msg(ring, 1, bo, NULL); if (r) goto error; @@ -1130,5 +1146,7 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) error: dma_fence_put(fence); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index 30ea54dd9117..d6d83a3ec803 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -58,10 +58,6 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev); int amdgpu_vce_entity_init(struct amdgpu_device *adev); int amdgpu_vce_suspend(struct amdgpu_device *adev); int amdgpu_vce_resume(struct amdgpu_device *adev); -int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, - struct dma_fence **fence); -int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - bool direct, struct dma_fence **fence); void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index ecf6f96df2ad..9d870444d7d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -26,7 +26,8 @@ #include <linux/firmware.h> #include <linux/module.h> -#include <drm/drmP.h> +#include <linux/pci.h> + #include <drm/drm.h> #include "amdgpu.h" @@ -45,10 +46,20 @@ #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" #define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin" #define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin" +#define FIRMWARE_ARCTURUS "amdgpu/arcturus_vcn.bin" +#define FIRMWARE_RENOIR "amdgpu/renoir_vcn.bin" +#define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin" +#define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin" +#define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); MODULE_FIRMWARE(FIRMWARE_RAVEN2); +MODULE_FIRMWARE(FIRMWARE_ARCTURUS); +MODULE_FIRMWARE(FIRMWARE_RENOIR); +MODULE_FIRMWARE(FIRMWARE_NAVI10); +MODULE_FIRMWARE(FIRMWARE_NAVI14); +MODULE_FIRMWARE(FIRMWARE_NAVI12); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); @@ -58,7 +69,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) const char *fw_name; const struct common_firmware_header *hdr; unsigned char fw_check; - int r; + int i, r; INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); @@ -71,6 +82,33 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) else fw_name = FIRMWARE_RAVEN; break; + case CHIP_ARCTURUS: + fw_name = FIRMWARE_ARCTURUS; + break; + case CHIP_RENOIR: + fw_name = FIRMWARE_RENOIR; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; + case CHIP_NAVI10: + fw_name = FIRMWARE_NAVI10; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; + case CHIP_NAVI14: + fw_name = FIRMWARE_NAVI14; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; + case CHIP_NAVI12: + fw_name = FIRMWARE_NAVI12; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; default: return -EINVAL; } @@ -124,12 +162,28 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); - r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo, - &adev->vcn.gpu_addr, &adev->vcn.cpu_addr); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); - return r; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].vcpu_bo, + &adev->vcn.inst[i].gpu_addr, &adev->vcn.inst[i].cpu_addr); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); + return r; + } + } + + if (adev->vcn.indirect_sram) { + r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.dpg_sram_bo, + &adev->vcn.dpg_sram_gpu_addr, &adev->vcn.dpg_sram_cpu_addr); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate DPG bo\n", r); + return r; + } } return 0; @@ -137,20 +191,32 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) { - int i; + int i, j; + + cancel_delayed_work_sync(&adev->vcn.idle_work); - kvfree(adev->vcn.saved_bo); + if (adev->vcn.indirect_sram) { + amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo, + &adev->vcn.dpg_sram_gpu_addr, + (void **)&adev->vcn.dpg_sram_cpu_addr); + } - amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo, - &adev->vcn.gpu_addr, - (void **)&adev->vcn.cpu_addr); + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + kvfree(adev->vcn.inst[j].saved_bo); - amdgpu_ring_fini(&adev->vcn.ring_dec); + amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo, + &adev->vcn.inst[j].gpu_addr, + (void **)&adev->vcn.inst[j].cpu_addr); - for (i = 0; i < adev->vcn.num_enc_rings; ++i) - amdgpu_ring_fini(&adev->vcn.ring_enc[i]); + amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec); - amdgpu_ring_fini(&adev->vcn.ring_jpeg); + for (i = 0; i < adev->vcn.num_enc_rings; ++i) + amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]); + + amdgpu_ring_fini(&adev->vcn.inst[j].ring_jpeg); + } release_firmware(adev->vcn.fw); @@ -161,21 +227,25 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev) { unsigned size; void *ptr; + int i; cancel_delayed_work_sync(&adev->vcn.idle_work); - if (adev->vcn.vcpu_bo == NULL) - return 0; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + if (adev->vcn.inst[i].vcpu_bo == NULL) + return 0; - size = amdgpu_bo_size(adev->vcn.vcpu_bo); - ptr = adev->vcn.cpu_addr; + size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); + ptr = adev->vcn.inst[i].cpu_addr; - adev->vcn.saved_bo = kvmalloc(size, GFP_KERNEL); - if (!adev->vcn.saved_bo) - return -ENOMEM; - - memcpy_fromio(adev->vcn.saved_bo, ptr, size); + adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL); + if (!adev->vcn.inst[i].saved_bo) + return -ENOMEM; + memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size); + } return 0; } @@ -183,158 +253,36 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) { unsigned size; void *ptr; + int i; - if (adev->vcn.vcpu_bo == NULL) - return -EINVAL; - - size = amdgpu_bo_size(adev->vcn.vcpu_bo); - ptr = adev->vcn.cpu_addr; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + if (adev->vcn.inst[i].vcpu_bo == NULL) + return -EINVAL; - if (adev->vcn.saved_bo != NULL) { - memcpy_toio(ptr, adev->vcn.saved_bo, size); - kvfree(adev->vcn.saved_bo); - adev->vcn.saved_bo = NULL; - } else { - const struct common_firmware_header *hdr; - unsigned offset; - - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - offset = le32_to_cpu(hdr->ucode_array_offset_bytes); - memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset, - le32_to_cpu(hdr->ucode_size_bytes)); - size -= le32_to_cpu(hdr->ucode_size_bytes); - ptr += le32_to_cpu(hdr->ucode_size_bytes); - } - memset_io(ptr, 0, size); - } + size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); + ptr = adev->vcn.inst[i].cpu_addr; - return 0; -} - -static int amdgpu_vcn_pause_dpg_mode(struct amdgpu_device *adev, - struct dpg_pause_state *new_state) -{ - int ret_code; - uint32_t reg_data = 0; - uint32_t reg_data2 = 0; - struct amdgpu_ring *ring; - - /* pause/unpause if state is changed */ - if (adev->vcn.pause_state.fw_based != new_state->fw_based) { - DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", - adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, - new_state->fw_based, new_state->jpeg); - - reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & - (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); - - if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { - ret_code = 0; - - if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK)) - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, - UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); - - if (!ret_code) { - /* pause DPG non-jpeg */ - reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; - WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, - UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, - UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); - - /* Restore */ - ring = &adev->vcn.ring_enc[0]; - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); - WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); - - ring = &adev->vcn.ring_enc[1]; - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); - WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); - - ring = &adev->vcn.ring_dec; - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, - RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, - UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); - } + if (adev->vcn.inst[i].saved_bo != NULL) { + memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size); + kvfree(adev->vcn.inst[i].saved_bo); + adev->vcn.inst[i].saved_bo = NULL; } else { - /* unpause dpg non-jpeg, no need to wait */ - reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; - WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); - } - adev->vcn.pause_state.fw_based = new_state->fw_based; - } - - /* pause/unpause if state is changed */ - if (adev->vcn.pause_state.jpeg != new_state->jpeg) { - DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", - adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, - new_state->fw_based, new_state->jpeg); - - reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & - (~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK); - - if (new_state->jpeg == VCN_DPG_STATE__PAUSE) { - ret_code = 0; - - if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK)) - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, - UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); - - if (!ret_code) { - /* Make sure JPRG Snoop is disabled before sending the pause */ - reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS); - reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK; - WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2); - - /* pause DPG jpeg */ - reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; - WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, - UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, - UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code); - - /* Restore */ - ring = &adev->vcn.ring_jpeg; - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, - UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | - UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, - upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, - UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); - - ring = &adev->vcn.ring_dec; - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, - RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, - UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + const struct common_firmware_header *hdr; + unsigned offset; + + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + offset = le32_to_cpu(hdr->ucode_array_offset_bytes); + memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset, + le32_to_cpu(hdr->ucode_size_bytes)); + size -= le32_to_cpu(hdr->ucode_size_bytes); + ptr += le32_to_cpu(hdr->ucode_size_bytes); } - } else { - /* unpause dpg jpeg, no need to wait */ - reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; - WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + memset_io(ptr, 0, size); } - adev->vcn.pause_state.jpeg = new_state->jpeg; } - return 0; } @@ -342,35 +290,40 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) { struct amdgpu_device *adev = container_of(work, struct amdgpu_device, vcn.idle_work.work); - unsigned int fences = 0; - unsigned int i; + unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; + unsigned int i, j; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]); - } + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); + } - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - struct dpg_pause_state new_state; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + struct dpg_pause_state new_state; - if (fences) - new_state.fw_based = VCN_DPG_STATE__PAUSE; - else - new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + if (fence[j]) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else + new_state.fw_based = VCN_DPG_STATE__UNPAUSE; - if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg)) - new_state.jpeg = VCN_DPG_STATE__PAUSE; - else - new_state.jpeg = VCN_DPG_STATE__UNPAUSE; + if (amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg)) + new_state.jpeg = VCN_DPG_STATE__PAUSE; + else + new_state.jpeg = VCN_DPG_STATE__UNPAUSE; - amdgpu_vcn_pause_dpg_mode(adev, &new_state); - } + adev->vcn.pause_dpg_mode(adev, &new_state); + } - fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg); - fences += amdgpu_fence_count_emitted(&adev->vcn.ring_dec); + fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg); + fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec); + fences += fence[j]; + } if (fences == 0) { amdgpu_gfx_off_ctrl(adev, true); - if (adev->pm.dpm_enabled) + if (adev->asic_type < CHIP_ARCTURUS && adev->pm.dpm_enabled) amdgpu_dpm_enable_uvd(adev, false); else amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, @@ -387,7 +340,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) if (set_clocks) { amdgpu_gfx_off_ctrl(adev, false); - if (adev->pm.dpm_enabled) + if (adev->asic_type < CHIP_ARCTURUS && adev->pm.dpm_enabled) amdgpu_dpm_enable_uvd(adev, true); else amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, @@ -400,14 +353,14 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) unsigned int i; for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]); + fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]); } if (fences) new_state.fw_based = VCN_DPG_STATE__PAUSE; else new_state.fw_based = VCN_DPG_STATE__UNPAUSE; - if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg)) + if (amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_jpeg)) new_state.jpeg = VCN_DPG_STATE__PAUSE; else new_state.jpeg = VCN_DPG_STATE__UNPAUSE; @@ -417,7 +370,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) new_state.jpeg = VCN_DPG_STATE__PAUSE; - amdgpu_vcn_pause_dpg_mode(adev, &new_state); + adev->vcn.pause_dpg_mode(adev, &new_state); } } @@ -433,20 +386,18 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD); + WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) return r; - - amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0)); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9)); + tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9); if (tmp == 0xDEADBEEF) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) @@ -472,14 +423,14 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, ib = &job->ibs[0]; addr = amdgpu_bo_gpu_offset(bo); - ib->ptr[0] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0); + ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0); ib->ptr[1] = addr; - ib->ptr[2] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0); + ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0); ib->ptr[3] = addr >> 32; - ib->ptr[4] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0); + ib->ptr[4] = PACKET0(adev->vcn.internal.cmd, 0); ib->ptr[5] = 0; for (i = 6; i < 16; i += 2) { - ib->ptr[i] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0); + ib->ptr[i] = PACKET0(adev->vcn.internal.nop, 0); ib->ptr[i+1] = 0; } ib->length_dw = 16; @@ -594,7 +545,7 @@ error: int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t rptr = amdgpu_ring_get_rptr(ring); + uint32_t rptr; unsigned i; int r; @@ -602,13 +553,15 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) if (r) return r; + rptr = amdgpu_ring_get_rptr(ring); + amdgpu_ring_write(ring, VCN_ENC_CMD_END); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { if (amdgpu_ring_get_rptr(ring) != rptr) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) @@ -618,13 +571,14 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) } static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, - struct dma_fence **fence) + struct amdgpu_bo *bo, + struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t dummy; + uint64_t addr; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -632,14 +586,14 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand return r; ib = &job->ibs[0]; - dummy = ib->gpu_addr + 1024; + addr = amdgpu_bo_gpu_offset(bo); ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ ib->ptr[ib->length_dw++] = handle; - ib->ptr[ib->length_dw++] = upper_32_bits(dummy); - ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = addr; ib->ptr[ib->length_dw++] = 0x0000000b; ib->ptr[ib->length_dw++] = 0x00000014; @@ -670,13 +624,14 @@ err: } static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - struct dma_fence **fence) + struct amdgpu_bo *bo, + struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t dummy; + uint64_t addr; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -684,14 +639,14 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han return r; ib = &job->ibs[0]; - dummy = ib->gpu_addr + 1024; + addr = amdgpu_bo_gpu_offset(bo); ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; ib->ptr[ib->length_dw++] = handle; - ib->ptr[ib->length_dw++] = upper_32_bits(dummy); - ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = addr; ib->ptr[ib->length_dw++] = 0x0000000b; ib->ptr[ib->length_dw++] = 0x00000014; @@ -724,13 +679,20 @@ err: int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct dma_fence *fence = NULL; + struct amdgpu_bo *bo = NULL; long r; - r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL); + r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &bo, NULL, NULL); + if (r) + return r; + + r = amdgpu_vcn_enc_get_create_msg(ring, 1, bo, NULL); if (r) goto error; - r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence); + r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, bo, &fence); if (r) goto error; @@ -742,6 +704,8 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) error: dma_fence_put(fence); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); return r; } @@ -752,22 +716,20 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD); + WREG32(adev->vcn.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); - if (r) return r; - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.jpeg_pitch, 0)); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9)); + tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch); if (tmp == 0xDEADBEEF) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) @@ -792,7 +754,7 @@ static int amdgpu_vcn_jpeg_set_reg(struct amdgpu_ring *ring, uint32_t handle, ib = &job->ibs[0]; - ib->ptr[0] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, PACKETJ_TYPE0); + ib->ptr[0] = PACKETJ(adev->vcn.internal.jpeg_pitch, 0, 0, PACKETJ_TYPE0); ib->ptr[1] = 0xDEADBEEF; for (i = 2; i < 16; i += 2) { ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); @@ -838,10 +800,10 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout) } for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9)); + tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch); if (tmp == 0xDEADBEEF) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index a0ad19af9080..dface275c81a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -25,11 +25,17 @@ #define __AMDGPU_VCN_H__ #define AMDGPU_VCN_STACK_SIZE (128*1024) -#define AMDGPU_VCN_CONTEXT_SIZE (512*1024) +#define AMDGPU_VCN_CONTEXT_SIZE (512*1024) #define AMDGPU_VCN_FIRMWARE_OFFSET 256 #define AMDGPU_VCN_MAX_ENC_RINGS 3 +#define AMDGPU_MAX_VCN_INSTANCES 2 + +#define AMDGPU_VCN_HARVEST_VCN0 (1 << 0) +#define AMDGPU_VCN_HARVEST_VCN1 (1 << 1) + +#define VCN_DEC_KMD_CMD 0x80000000 #define VCN_DEC_CMD_FENCE 0x00000000 #define VCN_DEC_CMD_TRAP 0x00000001 #define VCN_DEC_CMD_WRITE_REG 0x00000004 @@ -45,8 +51,81 @@ #define VCN_ENC_CMD_REG_WRITE 0x0000000b #define VCN_ENC_CMD_REG_WAIT 0x0000000c +#define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 +#define VCN_AON_SOC_ADDRESS_2_0 0x1f800 +#define VCN_VID_IP_ADDRESS_2_0 0x0 +#define VCN_AON_IP_ADDRESS_2_0 0x30000 + +#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \ + ({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ + WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ + UVD_DPG_LMA_CTL__MASK_EN_MASK | \ + ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ + << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ + (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ + RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); \ + }) + +#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \ + do { \ + WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ + WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ + UVD_DPG_LMA_CTL__READ_WRITE_MASK | \ + ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ + << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ + (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ + } while (0) + +#define SOC15_DPG_MODE_OFFSET_2_0(ip, inst, reg) \ + ({ \ + uint32_t internal_reg_offset, addr; \ + bool video_range, aon_range; \ + \ + addr = (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \ + addr <<= 2; \ + video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS_2_0)) && \ + ((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS_2_0 + 0x2600))))); \ + aon_range = ((((0xFFFFF & addr) >= (VCN_AON_SOC_ADDRESS_2_0)) && \ + ((0xFFFFF & addr) < ((VCN_AON_SOC_ADDRESS_2_0 + 0x600))))); \ + if (video_range) \ + internal_reg_offset = ((0xFFFFF & addr) - (VCN_VID_SOC_ADDRESS_2_0) + \ + (VCN_VID_IP_ADDRESS_2_0)); \ + else if (aon_range) \ + internal_reg_offset = ((0xFFFFF & addr) - (VCN_AON_SOC_ADDRESS_2_0) + \ + (VCN_AON_IP_ADDRESS_2_0)); \ + else \ + internal_reg_offset = (0xFFFFF & addr); \ + \ + internal_reg_offset >>= 2; \ + }) + +#define RREG32_SOC15_DPG_MODE_2_0(offset, mask_en) \ + ({ \ + WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_CTL, \ + (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ + mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + RREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_DATA); \ + }) + +#define WREG32_SOC15_DPG_MODE_2_0(offset, value, mask_en, indirect) \ + do { \ + if (!indirect) { \ + WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_CTL, \ + (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ + mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + } else { \ + *adev->vcn.dpg_sram_curr_addr++ = offset; \ + *adev->vcn.dpg_sram_curr_addr++ = value; \ + } \ + } while (0) + enum engine_status_constants { UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0, + UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON_2_0 = 0xAAAA0, UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002, UVD_STATUS__UVD_BUSY = 0x00000004, GB_ADDR_CONFIG_DEFAULT = 0x26010011, @@ -54,6 +133,7 @@ enum engine_status_constants { UVD_STATUS__BUSY = 0x5, UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF = 0x1, UVD_STATUS__RBC_BUSY = 0x1, + UVD_PGFSM_STATUS_UVDJ_PWR_ON = 0, }; enum internal_dpg_state { @@ -66,21 +146,54 @@ struct dpg_pause_state { enum internal_dpg_state jpeg; }; -struct amdgpu_vcn { +struct amdgpu_vcn_reg{ + unsigned data0; + unsigned data1; + unsigned cmd; + unsigned nop; + unsigned context_id; + unsigned ib_vmid; + unsigned ib_bar_low; + unsigned ib_bar_high; + unsigned ib_size; + unsigned gp_scratch8; + unsigned scratch9; + unsigned jpeg_pitch; +}; + +struct amdgpu_vcn_inst { struct amdgpu_bo *vcpu_bo; void *cpu_addr; uint64_t gpu_addr; - unsigned fw_version; void *saved_bo; - struct delayed_work idle_work; - const struct firmware *fw; /* VCN firmware */ struct amdgpu_ring ring_dec; struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS]; struct amdgpu_ring ring_jpeg; struct amdgpu_irq_src irq; + struct amdgpu_vcn_reg external; +}; + +struct amdgpu_vcn { + unsigned fw_version; + struct delayed_work idle_work; + const struct firmware *fw; /* VCN firmware */ unsigned num_enc_rings; enum amd_powergating_state cur_state; struct dpg_pause_state pause_state; + + bool indirect_sram; + struct amdgpu_bo *dpg_sram_bo; + void *dpg_sram_cpu_addr; + uint64_t dpg_sram_gpu_addr; + uint32_t *dpg_sram_curr_addr; + + uint8_t num_vcn_inst; + struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES]; + struct amdgpu_vcn_reg internal; + + unsigned harvest_config; + int (*pause_dpg_mode)(struct amdgpu_device *adev, + struct dpg_pause_state *new_state); }; int amdgpu_vcn_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 462a04e0f5e6..e32ae906d797 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -21,6 +21,10 @@ * */ +#include <linux/module.h> + +#include <drm/drm_drv.h> + #include "amdgpu.h" bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) @@ -36,6 +40,7 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) /* enable virtual display */ adev->mode_info.num_crtc = 1; adev->enable_virtual_display = true; + adev->ddev->driver->driver_features &= ~DRIVER_ATOMIC; adev->cg_flags = 0; adev->pg_flags = 0; } @@ -375,4 +380,53 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) } } +static uint32_t parse_clk(char *buf, bool min) +{ + char *ptr = buf; + uint32_t clk = 0; + + do { + ptr = strchr(ptr, ':'); + if (!ptr) + break; + ptr+=2; + if (kstrtou32(ptr, 10, &clk)) + return 0; + } while (!min); + + return clk * 100; +} + +uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest) +{ + char *buf = NULL; + uint32_t clk = 0; + + buf = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + adev->virt.ops->get_pp_clk(adev, PP_SCLK, buf); + clk = parse_clk(buf, lowest); + + kfree(buf); + return clk; +} + +uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest) +{ + char *buf = NULL; + uint32_t clk = 0; + + buf = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + adev->virt.ops->get_pp_clk(adev, PP_MCLK, buf); + clk = parse_clk(buf, lowest); + + kfree(buf); + + return clk; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 722deefc0a7e..b0b2bdc750df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -57,6 +57,8 @@ struct amdgpu_virt_ops { int (*reset_gpu)(struct amdgpu_device *adev); int (*wait_reset)(struct amdgpu_device *adev); void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3); + int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf); + int (*force_dpm_level)(struct amdgpu_device *adev, u32 level); }; /* @@ -83,6 +85,8 @@ enum AMDGIM_FEATURE_FLAG { AMDGIM_FEATURE_GIM_LOAD_UCODES = 0x2, /* VRAM LOST by GIM */ AMDGIM_FEATURE_GIM_FLR_VRAMLOST = 0x4, + /* HW PERF SIM in GIM */ + AMDGIM_FEATURE_HW_PERF_SIMULATION = (1 << 3), }; struct amd_sriov_msg_pf2vf_info_header { @@ -242,6 +246,7 @@ typedef struct amdgim_vf2pf_info_v2 amdgim_vf2pf_info ; struct amdgpu_virt { uint32_t caps; struct amdgpu_bo *csa_obj; + void *csa_cpu_addr; bool chained_ib_support; uint32_t reg_val_offs; struct amdgpu_irq_src ack_irq; @@ -252,6 +257,9 @@ struct amdgpu_virt { struct amdgpu_vf_error_buffer vf_errors; struct amdgpu_virt_fw_reserve fw_reserve; uint32_t gim_feature; + /* protect DPM events to GIM */ + struct mutex dpm_mutex; + uint32_t reg_access_mode; }; #define amdgpu_sriov_enabled(adev) \ @@ -278,6 +286,9 @@ static inline bool is_virtual_machine(void) #endif } +#define amdgim_is_hwperf(adev) \ + ((adev)->virt.gim_feature & AMDGIM_FEATURE_HW_PERF_SIMULATION) + bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); @@ -295,5 +306,6 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size, unsigned int key, unsigned int chksum); void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); - +uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest); +uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 16fcb56c232b..598c24505c73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -28,12 +28,13 @@ #include <linux/dma-fence-array.h> #include <linux/interval_tree_generic.h> #include <linux/idr.h> -#include <drm/drmP.h> + #include <drm/amdgpu_drm.h> #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" #include "amdgpu_gmc.h" +#include "amdgpu_xgmi.h" /** * DOC: GPUVM @@ -66,50 +67,6 @@ INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last, #undef LAST /** - * struct amdgpu_pte_update_params - Local structure - * - * Encapsulate some VM table update parameters to reduce - * the number of function parameters - * - */ -struct amdgpu_pte_update_params { - - /** - * @adev: amdgpu device we do this update for - */ - struct amdgpu_device *adev; - - /** - * @vm: optional amdgpu_vm we do this update for - */ - struct amdgpu_vm *vm; - - /** - * @src: address where to copy page table entries from - */ - uint64_t src; - - /** - * @ib: indirect buffer to fill with commands - */ - struct amdgpu_ib *ib; - - /** - * @func: Function which actually does the update - */ - void (*func)(struct amdgpu_pte_update_params *params, - struct amdgpu_bo *bo, uint64_t pe, - uint64_t addr, unsigned count, uint32_t incr, - uint64_t flags); - /** - * @pages_addr: - * - * DMA addresses to use for mapping, used during VM update by CPU - */ - dma_addr_t *pages_addr; -}; - -/** * struct amdgpu_prt_cb - Helper to disable partial resident texture feature from a fence callback */ struct amdgpu_prt_cb { @@ -173,7 +130,8 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, if (level == adev->vm_manager.root_level) /* For the root directory */ - return round_up(adev->vm_manager.max_pfn, 1ULL << shift) >> shift; + return round_up(adev->vm_manager.max_pfn, 1ULL << shift) + >> shift; else if (level != AMDGPU_VM_PTB) /* Everything in between */ return 512; @@ -183,6 +141,22 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, } /** + * amdgpu_vm_num_ats_entries - return the number of ATS entries in the root PD + * + * @adev: amdgpu_device pointer + * + * Returns: + * The number of entries in the root page directory which needs the ATS setting. + */ +static unsigned amdgpu_vm_num_ats_entries(struct amdgpu_device *adev) +{ + unsigned shift; + + shift = amdgpu_vm_level_shift(adev, adev->vm_manager.root_level); + return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT); +} + +/** * amdgpu_vm_entries_mask - the mask to get the entry number of a PD/PT * * @adev: amdgpu_device pointer @@ -329,11 +303,11 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, base->next = bo->vm_bo; bo->vm_bo = base; - if (bo->tbo.resv != vm->root.base.bo->tbo.resv) + if (bo->tbo.base.resv != vm->root.base.bo->tbo.base.resv) return; vm->bulk_moveable = false; - if (bo->tbo.type == ttm_bo_type_kernel) + if (bo->tbo.type == ttm_bo_type_kernel && bo->parent) amdgpu_vm_bo_relocated(base); else amdgpu_vm_bo_idle(base); @@ -368,7 +342,7 @@ static struct amdgpu_vm_pt *amdgpu_vm_pt_parent(struct amdgpu_vm_pt *pt) return container_of(parent->vm_bo, struct amdgpu_vm_pt, base); } -/** +/* * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt */ struct amdgpu_vm_pt_cursor { @@ -505,61 +479,40 @@ static void amdgpu_vm_pt_next(struct amdgpu_device *adev, } /** - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT + * amdgpu_vm_pt_first_dfs - start a deep first search * - * @adev: amdgpu_device pointer + * @adev: amdgpu_device structure * @vm: amdgpu_vm structure - * @start: start addr of the walk + * @start: optional cursor to start with * @cursor: state to initialize * - * Start a walk and go directly to the leaf node. - */ -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev, - struct amdgpu_vm *vm, uint64_t start, - struct amdgpu_vm_pt_cursor *cursor) -{ - amdgpu_vm_pt_start(adev, vm, start, cursor); - while (amdgpu_vm_pt_descendant(adev, cursor)); -} - -/** - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT - * - * @adev: amdgpu_device pointer - * @cursor: current state - * - * Walk the PD/PT tree to the next leaf node. + * Starts a deep first traversal of the PD/PT tree. */ -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev, +static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt_cursor *start, struct amdgpu_vm_pt_cursor *cursor) { - amdgpu_vm_pt_next(adev, cursor); - if (cursor->pfn != ~0ll) - while (amdgpu_vm_pt_descendant(adev, cursor)); + if (start) + *cursor = *start; + else + amdgpu_vm_pt_start(adev, vm, 0, cursor); + while (amdgpu_vm_pt_descendant(adev, cursor)); } /** - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the hierarchy - */ -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor) \ - for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor)); \ - (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev), &(cursor))) - -/** - * amdgpu_vm_pt_first_dfs - start a deep first search + * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue * - * @adev: amdgpu_device structure - * @vm: amdgpu_vm structure - * @cursor: state to initialize + * @start: starting point for the search + * @entry: current entry * - * Starts a deep first traversal of the PD/PT tree. + * Returns: + * True when the search should continue, false otherwise. */ -static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt_cursor *cursor) +static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start, + struct amdgpu_vm_pt *entry) { - amdgpu_vm_pt_start(adev, vm, 0, cursor); - while (amdgpu_vm_pt_descendant(adev, cursor)); + return entry && (!start || entry != start->entry); } /** @@ -584,14 +537,14 @@ static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev, amdgpu_vm_pt_ancestor(cursor); } -/** +/* * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs */ -#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry) \ - for (amdgpu_vm_pt_first_dfs((adev), (vm), &(cursor)), \ +#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \ + for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \ (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\ - (entry); (entry) = (cursor).entry, \ - amdgpu_vm_pt_next_dfs((adev), &(cursor))) + amdgpu_vm_pt_continue_dfs((start), (entry)); \ + (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor))) /** * amdgpu_vm_get_pd_bo - add the VM PD to a validation list @@ -615,6 +568,14 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, list_add(&entry->tv.head, validated); } +/** + * amdgpu_vm_del_from_lru_notify - update bulk_moveable flag + * + * @bo: BO which was removed from the LRU + * + * Make sure the bulk_moveable flag is updated when a BO is removed from the + * LRU. + */ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo) { struct amdgpu_bo *abo; @@ -632,7 +593,7 @@ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo) for (bo_base = abo->vm_bo; bo_base; bo_base = bo_base->next) { struct amdgpu_vm *vm = bo_base->vm; - if (abo->tbo.resv == vm->root.base.bo->tbo.resv) + if (abo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) vm->bulk_moveable = false; } @@ -649,21 +610,18 @@ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo) void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - struct ttm_bo_global *glob = adev->mman.bdev.glob; struct amdgpu_vm_bo_base *bo_base; -#if 0 if (vm->bulk_moveable) { - spin_lock(&glob->lru_lock); + spin_lock(&ttm_bo_glob.lru_lock); ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move); - spin_unlock(&glob->lru_lock); + spin_unlock(&ttm_bo_glob.lru_lock); return; } -#endif memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move)); - spin_lock(&glob->lru_lock); + spin_lock(&ttm_bo_glob.lru_lock); list_for_each_entry(bo_base, &vm->idle, vm_status) { struct amdgpu_bo *bo = bo_base->bo; @@ -675,7 +633,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, ttm_bo_move_to_lru_tail(&bo->shadow->tbo, &vm->lru_bulk_move); } - spin_unlock(&glob->lru_lock); + spin_unlock(&ttm_bo_glob.lru_lock); vm->bulk_moveable = true; } @@ -712,18 +670,11 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (bo->tbo.type != ttm_bo_type_kernel) { amdgpu_vm_bo_moved(bo_base); } else { - if (vm->use_cpu_for_update) - r = amdgpu_bo_kmap(bo, NULL); + vm->update_funcs->map_table(bo); + if (bo->parent) + amdgpu_vm_bo_relocated(bo_base); else - r = amdgpu_ttm_alloc_gart(&bo->tbo); - if (r) - break; - if (bo->shadow) { - r = amdgpu_ttm_alloc_gart(&bo->shadow->tbo); - if (r) - break; - } - amdgpu_vm_bo_relocated(bo_base); + amdgpu_vm_bo_idle(bo_base); } } @@ -751,8 +702,7 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) * @adev: amdgpu_device pointer * @vm: VM to clear BO from * @bo: BO to clear - * @level: level this BO is at - * @pte_support_ats: indicate ATS support from PTE + * @direct: use a direct update * * Root PD needs to be reserved when calling this. * @@ -760,99 +710,114 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) * 0 on success, errno otherwise. */ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, - struct amdgpu_vm *vm, struct amdgpu_bo *bo, - unsigned level, bool pte_support_ats) + struct amdgpu_vm *vm, + struct amdgpu_bo *bo, + bool direct) { struct ttm_operation_ctx ctx = { true, false }; - struct dma_fence *fence = NULL; + unsigned level = adev->vm_manager.root_level; + struct amdgpu_vm_update_params params; + struct amdgpu_bo *ancestor = bo; unsigned entries, ats_entries; - struct amdgpu_ring *ring; - struct amdgpu_job *job; uint64_t addr; int r; + /* Figure out our place in the hierarchy */ + if (ancestor->parent) { + ++level; + while (ancestor->parent->parent) { + ++level; + ancestor = ancestor->parent; + } + } + entries = amdgpu_bo_size(bo) / 8; + if (!vm->pte_support_ats) { + ats_entries = 0; - if (pte_support_ats) { - if (level == adev->vm_manager.root_level) { - ats_entries = amdgpu_vm_level_shift(adev, level); - ats_entries += AMDGPU_GPU_PAGE_SHIFT; - ats_entries = AMDGPU_GMC_HOLE_START >> ats_entries; - ats_entries = min(ats_entries, entries); - entries -= ats_entries; + } else if (!bo->parent) { + ats_entries = amdgpu_vm_num_ats_entries(adev); + ats_entries = min(ats_entries, entries); + entries -= ats_entries; + + } else { + struct amdgpu_vm_pt *pt; + + pt = container_of(ancestor->vm_bo, struct amdgpu_vm_pt, base); + ats_entries = amdgpu_vm_num_ats_entries(adev); + if ((pt - vm->root.entries) >= ats_entries) { + ats_entries = 0; } else { ats_entries = entries; entries = 0; } - } else { - ats_entries = 0; } - ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched); - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) - goto error; + return r; + + if (bo->shadow) { + r = ttm_bo_validate(&bo->shadow->tbo, &bo->shadow->placement, + &ctx); + if (r) + return r; + } - r = amdgpu_ttm_alloc_gart(&bo->tbo); + r = vm->update_funcs->map_table(bo); if (r) return r; - r = amdgpu_job_alloc_with_ib(adev, 64, &job); + memset(¶ms, 0, sizeof(params)); + params.adev = adev; + params.vm = vm; + params.direct = direct; + + r = vm->update_funcs->prepare(¶ms, AMDGPU_FENCE_OWNER_KFD, NULL); if (r) - goto error; + return r; - addr = amdgpu_bo_gpu_offset(bo); + addr = 0; if (ats_entries) { - uint64_t ats_value; + uint64_t value = 0, flags; - ats_value = AMDGPU_PTE_DEFAULT_ATC; - if (level != AMDGPU_VM_PTB) - ats_value |= AMDGPU_PDE_PTE; + flags = AMDGPU_PTE_DEFAULT_ATC; + if (level != AMDGPU_VM_PTB) { + /* Handle leaf PDEs as PTEs */ + flags |= AMDGPU_PDE_PTE; + amdgpu_gmc_get_vm_pde(adev, level, &value, &flags); + } + + r = vm->update_funcs->update(¶ms, bo, addr, 0, ats_entries, + value, flags); + if (r) + return r; - amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0, - ats_entries, 0, ats_value); addr += ats_entries * 8; } if (entries) { - uint64_t value = 0; - - /* Workaround for fault priority problem on GMC9 */ - if (level == AMDGPU_VM_PTB && adev->asic_type >= CHIP_VEGA10) - value = AMDGPU_PTE_EXECUTABLE; + uint64_t value = 0, flags = 0; + + if (adev->asic_type >= CHIP_VEGA10) { + if (level != AMDGPU_VM_PTB) { + /* Handle leaf PDEs as PTEs */ + flags |= AMDGPU_PDE_PTE; + amdgpu_gmc_get_vm_pde(adev, level, + &value, &flags); + } else { + /* Workaround for fault priority problem on GMC9 */ + flags = AMDGPU_PTE_EXECUTABLE; + } + } - amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0, - entries, 0, value); + r = vm->update_funcs->update(¶ms, bo, addr, 0, entries, + value, flags); + if (r) + return r; } - amdgpu_ring_pad_ib(ring, &job->ibs[0]); - - WARN_ON(job->ibs[0].length_dw > 64); - r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv, - AMDGPU_FENCE_OWNER_KFD, false); - if (r) - goto error_free; - - r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_UNDEFINED, - &fence); - if (r) - goto error_free; - - amdgpu_bo_fence(bo, fence, true); - dma_fence_put(fence); - - if (bo->shadow) - return amdgpu_vm_clear_bo(adev, vm, bo->shadow, - level, pte_support_ats); - - return 0; - -error_free: - amdgpu_job_free(job); - -error: - return r; + return vm->update_funcs->commit(¶ms, NULL); } /** @@ -860,10 +825,13 @@ error: * * @adev: amdgpu_device pointer * @vm: requesting vm + * @level: the page table level + * @direct: use a direct update * @bp: resulting BO allocation parameters */ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int level, struct amdgpu_bo_param *bp) + int level, bool direct, + struct amdgpu_bo_param *bp) { memset(bp, 0, sizeof(*bp)); @@ -878,94 +846,64 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, else if (!vm->root.base.bo || vm->root.base.bo->shadow) bp->flags |= AMDGPU_GEM_CREATE_SHADOW; bp->type = ttm_bo_type_kernel; + bp->no_wait_gpu = direct; if (vm->root.base.bo) - bp->resv = vm->root.base.bo->tbo.resv; + bp->resv = vm->root.base.bo->tbo.base.resv; } /** - * amdgpu_vm_alloc_pts - Allocate page tables. + * amdgpu_vm_alloc_pts - Allocate a specific page table * * @adev: amdgpu_device pointer * @vm: VM to allocate page tables for - * @saddr: Start address which needs to be allocated - * @size: Size from start address we need. + * @cursor: Which page table to allocate + * @direct: use a direct update * - * Make sure the page directories and page tables are allocated + * Make sure a specific page table or directory is allocated. * * Returns: - * 0 on success, errno otherwise. + * 1 if page table needed to be allocated, 0 if page table was already + * allocated, negative errno if an error occurred. */ -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - uint64_t saddr, uint64_t size) +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt_cursor *cursor, + bool direct) { - struct amdgpu_vm_pt_cursor cursor; + struct amdgpu_vm_pt *entry = cursor->entry; + struct amdgpu_bo_param bp; struct amdgpu_bo *pt; - bool ats = false; - uint64_t eaddr; int r; - /* validate the parameters */ - if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK) - return -EINVAL; - - eaddr = saddr + size - 1; - - if (vm->pte_support_ats) - ats = saddr < AMDGPU_GMC_HOLE_START; - - saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + if (cursor->level < AMDGPU_VM_PTB && !entry->entries) { + unsigned num_entries; - if (eaddr >= adev->vm_manager.max_pfn) { - dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n", - eaddr, adev->vm_manager.max_pfn); - return -EINVAL; + num_entries = amdgpu_vm_num_entries(adev, cursor->level); + entry->entries = kvmalloc_array(num_entries, + sizeof(*entry->entries), + GFP_KERNEL | __GFP_ZERO); + if (!entry->entries) + return -ENOMEM; } - for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) { - struct amdgpu_vm_pt *entry = cursor.entry; - struct amdgpu_bo_param bp; - - if (cursor.level < AMDGPU_VM_PTB) { - unsigned num_entries; - - num_entries = amdgpu_vm_num_entries(adev, cursor.level); - entry->entries = kvmalloc_array(num_entries, - sizeof(*entry->entries), - GFP_KERNEL | - __GFP_ZERO); - if (!entry->entries) - return -ENOMEM; - } - - - if (entry->base.bo) - continue; + if (entry->base.bo) + return 0; - amdgpu_vm_bo_param(adev, vm, cursor.level, &bp); + amdgpu_vm_bo_param(adev, vm, cursor->level, direct, &bp); - r = amdgpu_bo_create(adev, &bp, &pt); - if (r) - return r; - - if (vm->use_cpu_for_update) { - r = amdgpu_bo_kmap(pt, NULL); - if (r) - goto error_free_pt; - } - - /* Keep a reference to the root directory to avoid - * freeing them up in the wrong order. - */ - pt->parent = amdgpu_bo_ref(cursor.parent->base.bo); + r = amdgpu_bo_create(adev, &bp, &pt); + if (r) + return r; - amdgpu_vm_bo_base_init(&entry->base, vm, pt); + /* Keep a reference to the root directory to avoid + * freeing them up in the wrong order. + */ + pt->parent = amdgpu_bo_ref(cursor->parent->base.bo); + amdgpu_vm_bo_base_init(&entry->base, vm, pt); - r = amdgpu_vm_clear_bo(adev, vm, pt, cursor.level, ats); - if (r) - goto error_free_pt; - } + r = amdgpu_vm_clear_bo(adev, vm, pt, direct); + if (r) + goto error_free_pt; return 0; @@ -976,31 +914,45 @@ error_free_pt: } /** + * amdgpu_vm_free_table - fre one PD/PT + * + * @entry: PDE to free + */ +static void amdgpu_vm_free_table(struct amdgpu_vm_pt *entry) +{ + if (entry->base.bo) { + entry->base.bo->vm_bo = NULL; + list_del(&entry->base.vm_status); + amdgpu_bo_unref(&entry->base.bo->shadow); + amdgpu_bo_unref(&entry->base.bo); + } + kvfree(entry->entries); + entry->entries = NULL; +} + +/** * amdgpu_vm_free_pts - free PD/PT levels * * @adev: amdgpu device structure * @vm: amdgpu vm structure + * @start: optional cursor where to start freeing PDs/PTs * * Free the page directory or page table level and all sub levels. */ static void amdgpu_vm_free_pts(struct amdgpu_device *adev, - struct amdgpu_vm *vm) + struct amdgpu_vm *vm, + struct amdgpu_vm_pt_cursor *start) { struct amdgpu_vm_pt_cursor cursor; struct amdgpu_vm_pt *entry; - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry) { + vm->bulk_moveable = false; - if (entry->base.bo) { - entry->base.bo->vm_bo = NULL; - list_del(&entry->base.vm_status); - amdgpu_bo_unref(&entry->base.bo->shadow); - amdgpu_bo_unref(&entry->base.bo); - } - kvfree(entry->entries); - } + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) + amdgpu_vm_free_table(entry); - BUG_ON(vm->root.base.bo); + if (start) + amdgpu_vm_free_table(start->entry); } /** @@ -1086,7 +1038,8 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, * Returns: * 0 on success, errno otherwise. */ -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync) +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, + bool need_pipe_sync) { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; @@ -1100,10 +1053,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ id->oa_base != job->oa_base || id->oa_size != job->oa_size); bool vm_flush_needed = job->vm_needs_flush; - bool pasid_mapping_needed = id->pasid != job->pasid || - !id->pasid_mapping || - !dma_fence_is_signaled(id->pasid_mapping); struct dma_fence *fence = NULL; + bool pasid_mapping_needed = false; unsigned patch_offset = 0; int r; @@ -1113,6 +1064,12 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ pasid_mapping_needed = true; } + mutex_lock(&id_mgr->lock); + if (id->pasid != job->pasid || !id->pasid_mapping || + !dma_fence_is_signaled(id->pasid_mapping)) + pasid_mapping_needed = true; + mutex_unlock(&id_mgr->lock); + gds_switch_needed &= !!ring->funcs->emit_gds_switch; vm_flush_needed &= !!ring->funcs->emit_vm_flush && job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET; @@ -1152,9 +1109,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ } if (pasid_mapping_needed) { + mutex_lock(&id_mgr->lock); id->pasid = job->pasid; dma_fence_put(id->pasid_mapping); id->pasid_mapping = dma_fence_get(fence); + mutex_unlock(&id_mgr->lock); } dma_fence_put(fence); @@ -1212,66 +1171,6 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, } /** - * amdgpu_vm_do_set_ptes - helper to call the right asic function - * - * @params: see amdgpu_pte_update_params definition - * @bo: PD/PT to update - * @pe: addr of the page entry - * @addr: dst addr to write into pe - * @count: number of page entries to update - * @incr: increase next addr by incr bytes - * @flags: hw access flags - * - * Traces the parameters and calls the right asic functions - * to setup the page table using the DMA. - */ -static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, - struct amdgpu_bo *bo, - uint64_t pe, uint64_t addr, - unsigned count, uint32_t incr, - uint64_t flags) -{ - pe += amdgpu_bo_gpu_offset(bo); - trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); - - if (count < 3) { - amdgpu_vm_write_pte(params->adev, params->ib, pe, - addr | flags, count, incr); - - } else { - amdgpu_vm_set_pte_pde(params->adev, params->ib, pe, addr, - count, incr, flags); - } -} - -/** - * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART - * - * @params: see amdgpu_pte_update_params definition - * @bo: PD/PT to update - * @pe: addr of the page entry - * @addr: dst addr to write into pe - * @count: number of page entries to update - * @incr: increase next addr by incr bytes - * @flags: hw access flags - * - * Traces the parameters and calls the DMA function to copy the PTEs. - */ -static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params, - struct amdgpu_bo *bo, - uint64_t pe, uint64_t addr, - unsigned count, uint32_t incr, - uint64_t flags) -{ - uint64_t src = (params->src + (addr >> 12) * 8); - - pe += amdgpu_bo_gpu_offset(bo); - trace_amdgpu_vm_copy_ptes(pe, src, count); - - amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count); -} - -/** * amdgpu_vm_map_gart - Resolve gart mapping of addr * * @pages_addr: optional DMA address to use for lookup @@ -1283,7 +1182,7 @@ static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params, * Returns: * The pointer for the page table entry. */ -static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) +uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) { uint64_t result; @@ -1299,90 +1198,33 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) } /** - * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU - * - * @params: see amdgpu_pte_update_params definition - * @bo: PD/PT to update - * @pe: kmap addr of the page entry - * @addr: dst addr to write into pe - * @count: number of page entries to update - * @incr: increase next addr by incr bytes - * @flags: hw access flags - * - * Write count number of PT/PD entries directly. - */ -static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, - struct amdgpu_bo *bo, - uint64_t pe, uint64_t addr, - unsigned count, uint32_t incr, - uint64_t flags) -{ - unsigned int i; - uint64_t value; - - pe += (unsigned long)amdgpu_bo_kptr(bo); - - trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); - - for (i = 0; i < count; i++) { - value = params->pages_addr ? - amdgpu_vm_map_gart(params->pages_addr, addr) : - addr; - amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe, - i, value, flags); - addr += incr; - } -} - -/** - * amdgpu_vm_update_func - helper to call update function - * - * Calls the update function for both the given BO as well as its shadow. - */ -static void amdgpu_vm_update_func(struct amdgpu_pte_update_params *params, - struct amdgpu_bo *bo, - uint64_t pe, uint64_t addr, - unsigned count, uint32_t incr, - uint64_t flags) -{ - if (bo->shadow) - params->func(params, bo->shadow, pe, addr, count, incr, flags); - params->func(params, bo, pe, addr, count, incr, flags); -} - -/* * amdgpu_vm_update_pde - update a single level in the hierarchy * - * @param: parameters for the update + * @params: parameters for the update * @vm: requested vm - * @parent: parent directory * @entry: entry to update * * Makes sure the requested entry in parent is up to date. */ -static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt *parent, - struct amdgpu_vm_pt *entry) +static int amdgpu_vm_update_pde(struct amdgpu_vm_update_params *params, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt *entry) { + struct amdgpu_vm_pt *parent = amdgpu_vm_pt_parent(entry); struct amdgpu_bo *bo = parent->base.bo, *pbo; uint64_t pde, pt, flags; unsigned level; - /* Don't update huge pages here */ - if (entry->huge) - return; - for (level = 0, pbo = bo->parent; pbo; ++level) pbo = pbo->parent; level += params->adev->vm_manager.root_level; amdgpu_gmc_get_pde_for_bo(entry->base.bo, level, &pt, &flags); pde = (entry - parent->entries) * 8; - amdgpu_vm_update_func(params, bo, pde, pt, 1, 0, flags); + return vm->update_funcs->update(params, bo, pde, pt, 1, 0, flags); } -/* +/** * amdgpu_vm_invalidate_pds - mark all PDs as invalid * * @adev: amdgpu_device pointer @@ -1396,117 +1238,69 @@ static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev, struct amdgpu_vm_pt_cursor cursor; struct amdgpu_vm_pt *entry; - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry) + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) if (entry->base.bo && !entry->base.moved) amdgpu_vm_bo_relocated(&entry->base); } -/* - * amdgpu_vm_update_directories - make sure that all directories are valid +/** + * amdgpu_vm_update_pdes - make sure that all directories are valid * * @adev: amdgpu_device pointer * @vm: requested vm + * @direct: submit directly to the paging queue * * Makes sure all directories are up to date. * * Returns: * 0 for success, error for failure. */ -int amdgpu_vm_update_directories(struct amdgpu_device *adev, - struct amdgpu_vm *vm) +int amdgpu_vm_update_pdes(struct amdgpu_device *adev, + struct amdgpu_vm *vm, bool direct) { - struct amdgpu_pte_update_params params; - struct amdgpu_job *job; - unsigned ndw = 0; - int r = 0; + struct amdgpu_vm_update_params params; + int r; if (list_empty(&vm->relocated)) return 0; -restart: memset(¶ms, 0, sizeof(params)); params.adev = adev; + params.vm = vm; + params.direct = direct; - if (vm->use_cpu_for_update) { - r = amdgpu_bo_sync_wait(vm->root.base.bo, - AMDGPU_FENCE_OWNER_VM, true); - if (unlikely(r)) - return r; - - params.func = amdgpu_vm_cpu_set_ptes; - } else { - ndw = 512 * 8; - r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); - if (r) - return r; - - params.ib = &job->ibs[0]; - params.func = amdgpu_vm_do_set_ptes; - } + r = vm->update_funcs->prepare(¶ms, AMDGPU_FENCE_OWNER_VM, NULL); + if (r) + return r; while (!list_empty(&vm->relocated)) { - struct amdgpu_vm_pt *pt, *entry; + struct amdgpu_vm_pt *entry; entry = list_first_entry(&vm->relocated, struct amdgpu_vm_pt, base.vm_status); amdgpu_vm_bo_idle(&entry->base); - pt = amdgpu_vm_pt_parent(entry); - if (!pt) - continue; - - amdgpu_vm_update_pde(¶ms, vm, pt, entry); - - if (!vm->use_cpu_for_update && - (ndw - params.ib->length_dw) < 32) - break; - } - - if (vm->use_cpu_for_update) { - /* Flush HDP */ - mb(); - amdgpu_asic_flush_hdp(adev, NULL); - } else if (params.ib->length_dw == 0) { - amdgpu_job_free(job); - } else { - struct amdgpu_bo *root = vm->root.base.bo; - struct amdgpu_ring *ring; - struct dma_fence *fence; - - ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, - sched); - - amdgpu_ring_pad_ib(ring, params.ib); - amdgpu_sync_resv(adev, &job->sync, root->tbo.resv, - AMDGPU_FENCE_OWNER_VM, false); - WARN_ON(params.ib->length_dw > ndw); - r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_VM, - &fence); + r = amdgpu_vm_update_pde(¶ms, vm, entry); if (r) goto error; - - amdgpu_bo_fence(root, fence, true); - dma_fence_put(vm->last_update); - vm->last_update = fence; } - if (!list_empty(&vm->relocated)) - goto restart; - + r = vm->update_funcs->commit(¶ms, &vm->last_update); + if (r) + goto error; return 0; error: amdgpu_vm_invalidate_pds(adev, vm); - amdgpu_job_free(job); return r; } -/** +/* * amdgpu_vm_update_flags - figure out flags for PTE updates * * Make sure to set the right flags for the PTEs at the desired level. */ -static void amdgpu_vm_update_flags(struct amdgpu_pte_update_params *params, +static void amdgpu_vm_update_flags(struct amdgpu_vm_update_params *params, struct amdgpu_bo *bo, unsigned level, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, @@ -1525,13 +1319,14 @@ static void amdgpu_vm_update_flags(struct amdgpu_pte_update_params *params, flags |= AMDGPU_PTE_EXECUTABLE; } - amdgpu_vm_update_func(params, bo, pe, addr, count, incr, flags); + params->vm->update_funcs->update(params, bo, pe, addr, count, incr, + flags); } /** * amdgpu_vm_fragment - get fragment for PTEs * - * @params: see amdgpu_pte_update_params definition + * @params: see amdgpu_vm_update_params definition * @start: first PTE to handle * @end: last PTE to handle * @flags: hw mapping flags @@ -1540,7 +1335,7 @@ static void amdgpu_vm_update_flags(struct amdgpu_pte_update_params *params, * * Returns the first possible fragment for the start and end address. */ -static void amdgpu_vm_fragment(struct amdgpu_pte_update_params *params, +static void amdgpu_vm_fragment(struct amdgpu_vm_update_params *params, uint64_t start, uint64_t end, uint64_t flags, unsigned int *frag, uint64_t *frag_end) { @@ -1573,7 +1368,7 @@ static void amdgpu_vm_fragment(struct amdgpu_pte_update_params *params, max_frag = 31; /* system pages are non continuously */ - if (params->src) { + if (params->pages_addr) { *frag = 0; *frag_end = end; return; @@ -1592,7 +1387,7 @@ static void amdgpu_vm_fragment(struct amdgpu_pte_update_params *params, /** * amdgpu_vm_update_ptes - make sure that page tables are valid * - * @params: see amdgpu_pte_update_params definition + * @params: see amdgpu_vm_update_params definition * @start: start of GPU address range * @end: end of GPU address range * @dst: destination address to map to, the next dst inside the function @@ -1603,7 +1398,7 @@ static void amdgpu_vm_fragment(struct amdgpu_pte_update_params *params, * Returns: * 0 for success, -EINVAL for failure. */ -static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, +static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, uint64_t start, uint64_t end, uint64_t dst, uint64_t flags) { @@ -1611,6 +1406,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, struct amdgpu_vm_pt_cursor cursor; uint64_t frag_start = start, frag_end; unsigned int frag; + int r; /* figure out the initial fragment */ amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end); @@ -1618,12 +1414,19 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, /* walk over the address space and update the PTs */ amdgpu_vm_pt_start(adev, params->vm, start, &cursor); while (cursor.pfn < end) { - struct amdgpu_bo *pt = cursor.entry->base.bo; unsigned shift, parent_shift, mask; uint64_t incr, entry_end, pe_start; + struct amdgpu_bo *pt; - if (!pt) - return -ENOENT; + /* make sure that the page tables covering the address range are + * actually allocated + */ + r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor, + params->direct); + if (r) + return r; + + pt = cursor.entry->base.bo; /* The root level can't be a huge page */ if (cursor.level == adev->vm_manager.root_level) { @@ -1632,16 +1435,10 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, continue; } - /* If it isn't already handled it can't be a huge page */ - if (cursor.entry->huge) { - /* Add the entry to the relocated list to update it. */ - cursor.entry->huge = false; - amdgpu_vm_bo_relocated(&cursor.entry->base); - } - shift = amdgpu_vm_level_shift(adev, cursor.level); parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1); - if (adev->asic_type < CHIP_VEGA10) { + if (adev->asic_type < CHIP_VEGA10 && + (flags & AMDGPU_PTE_VALID)) { /* No huge page support before GMC v9 */ if (cursor.level != AMDGPU_VM_PTB) { if (!amdgpu_vm_pt_descendant(adev, &cursor)) @@ -1697,9 +1494,14 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, } while (frag_start < entry_end); if (amdgpu_vm_pt_descendant(adev, &cursor)) { - /* Mark all child entries as huge */ + /* Free all child entries. + * Update the tables with the flags and addresses and free up subsequent + * tables in the case of huge pages or freed up areas. + * This is the maximum you can free, because all other page tables are not + * completely covered by the range and so potentially still in use. + */ while (cursor.pfn < frag_start) { - cursor.entry->huge = true; + amdgpu_vm_free_pts(adev, params->vm, &cursor); amdgpu_vm_pt_next(adev, &cursor); } @@ -1716,13 +1518,14 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table * * @adev: amdgpu_device pointer - * @exclusive: fence we need to sync to - * @pages_addr: DMA addresses to use for mapping * @vm: requested vm + * @direct: direct submission in a page fault + * @exclusive: fence we need to sync to * @start: start of mapped range * @last: last mapped entry * @flags: flags for the entries * @addr: addr to set the area to + * @pages_addr: DMA addresses to use for mapping * @fence: optional resulting fence * * Fill in the page table entries between @start and @last. @@ -1731,144 +1534,36 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, * 0 for success, -EINVAL for failure. */ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + struct amdgpu_vm *vm, bool direct, struct dma_fence *exclusive, - dma_addr_t *pages_addr, - struct amdgpu_vm *vm, uint64_t start, uint64_t last, uint64_t flags, uint64_t addr, + dma_addr_t *pages_addr, struct dma_fence **fence) { - struct amdgpu_ring *ring; + struct amdgpu_vm_update_params params; void *owner = AMDGPU_FENCE_OWNER_VM; - unsigned nptes, ncmds, ndw; - struct amdgpu_job *job; - struct amdgpu_pte_update_params params; - struct dma_fence *f = NULL; int r; memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; + params.direct = direct; + params.pages_addr = pages_addr; /* sync to everything except eviction fences on unmapping */ if (!(flags & AMDGPU_PTE_VALID)) owner = AMDGPU_FENCE_OWNER_KFD; - if (vm->use_cpu_for_update) { - /* params.src is used as flag to indicate system Memory */ - if (pages_addr) - params.src = ~0; - - /* Wait for PT BOs to be idle. PTs share the same resv. object - * as the root PD BO - */ - r = amdgpu_bo_sync_wait(vm->root.base.bo, owner, true); - if (unlikely(r)) - return r; - - /* Wait for any BO move to be completed */ - if (exclusive) { - r = dma_fence_wait(exclusive, true); - if (unlikely(r)) - return r; - } - - params.func = amdgpu_vm_cpu_set_ptes; - params.pages_addr = pages_addr; - return amdgpu_vm_update_ptes(¶ms, start, last + 1, - addr, flags); - } - - ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched); - - nptes = last - start + 1; - - /* - * reserve space for two commands every (1 << BLOCK_SIZE) - * entries or 2k dwords (whatever is smaller) - */ - ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1); - - /* The second command is for the shadow pagetables. */ - if (vm->root.base.bo->shadow) - ncmds *= 2; - - /* padding, etc. */ - ndw = 64; - - if (pages_addr) { - /* copy commands needed */ - ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw; - - /* and also PTEs */ - ndw += nptes * 2; - - params.func = amdgpu_vm_do_copy_ptes; - - } else { - /* set page commands needed */ - ndw += ncmds * 10; - - /* extra commands for begin/end fragments */ - ncmds = 2 * adev->vm_manager.fragment_size; - if (vm->root.base.bo->shadow) - ncmds *= 2; - - ndw += 10 * ncmds; - - params.func = amdgpu_vm_do_set_ptes; - } - - r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); + r = vm->update_funcs->prepare(¶ms, owner, exclusive); if (r) return r; - params.ib = &job->ibs[0]; - - if (pages_addr) { - uint64_t *pte; - unsigned i; - - /* Put the PTEs at the end of the IB. */ - i = ndw - nptes * 2; - pte= (uint64_t *)&(job->ibs->ptr[i]); - params.src = job->ibs->gpu_addr + i * 4; - - for (i = 0; i < nptes; ++i) { - pte[i] = amdgpu_vm_map_gart(pages_addr, addr + i * - AMDGPU_GPU_PAGE_SIZE); - pte[i] |= flags; - } - addr = 0; - } - - r = amdgpu_sync_fence(adev, &job->sync, exclusive, false); - if (r) - goto error_free; - - r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv, - owner, false); - if (r) - goto error_free; - r = amdgpu_vm_update_ptes(¶ms, start, last + 1, addr, flags); if (r) - goto error_free; - - amdgpu_ring_pad_ib(ring, params.ib); - WARN_ON(params.ib->length_dw > ndw); - r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_VM, &f); - if (r) - goto error_free; - - amdgpu_bo_fence(vm->root.base.bo, f, true); - dma_fence_put(*fence); - *fence = f; - return 0; + return r; -error_free: - amdgpu_job_free(job); - return r; + return vm->update_funcs->commit(¶ms, fence); } /** @@ -1880,6 +1575,7 @@ error_free: * @vm: requested vm * @mapping: mapped range and flags to use for the update * @flags: HW flags for the mapping + * @bo_adev: amdgpu_device pointer that bo actually been allocated * @nodes: array of drm_mm_nodes with the MC addresses * @fence: optional resulting fence * @@ -1895,6 +1591,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va_mapping *mapping, uint64_t flags, + struct amdgpu_device *bo_adev, struct drm_mm_node *nodes, struct dma_fence **fence) { @@ -1910,17 +1607,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, if (!(mapping->flags & AMDGPU_PTE_WRITEABLE)) flags &= ~AMDGPU_PTE_WRITEABLE; - flags &= ~AMDGPU_PTE_EXECUTABLE; - flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; - - flags &= ~AMDGPU_PTE_MTYPE_MASK; - flags |= (mapping->flags & AMDGPU_PTE_MTYPE_MASK); - - if ((mapping->flags & AMDGPU_PTE_PRT) && - (adev->asic_type >= CHIP_VEGA10)) { - flags |= AMDGPU_PTE_PRT; - flags &= ~AMDGPU_PTE_VALID; - } + /* Apply ASIC specific mapping flags */ + amdgpu_gmc_get_vm_pte(adev, mapping, &flags); trace_amdgpu_vm_bo_update(mapping); @@ -1949,7 +1637,6 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, if (pages_addr) { uint64_t count; - max_entries = min(max_entries, 16ull * 1024ull); for (count = 1; count < max_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE; ++count) { @@ -1965,18 +1652,19 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, dma_addr = pages_addr; } else { addr = pages_addr[pfn]; - max_entries = count * AMDGPU_GPU_PAGES_IN_CPU_PAGE; + max_entries = count * + AMDGPU_GPU_PAGES_IN_CPU_PAGE; } } else if (flags & AMDGPU_PTE_VALID) { - addr += adev->vm_manager.vram_base_offset; + addr += bo_adev->vm_manager.vram_base_offset; addr += pfn << PAGE_SHIFT; } last = min((uint64_t)mapping->last, start + max_entries - 1); - r = amdgpu_vm_bo_update_mapping(adev, exclusive, dma_addr, vm, + r = amdgpu_vm_bo_update_mapping(adev, vm, false, exclusive, start, last, flags, addr, - fence); + dma_addr, fence); if (r) return r; @@ -2004,8 +1692,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, * Returns: * 0 for success, -EINVAL for failure. */ -int amdgpu_vm_bo_update(struct amdgpu_device *adev, - struct amdgpu_bo_va *bo_va, +int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bool clear) { struct amdgpu_bo *bo = bo_va->base.bo; @@ -2016,6 +1703,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct drm_mm_node *nodes; struct dma_fence *exclusive, **last_update; uint64_t flags; + struct amdgpu_device *bo_adev = adev; int r; if (clear || !bo) { @@ -2031,15 +1719,17 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm); pages_addr = ttm->dma_address; } - exclusive = reservation_object_get_excl(bo->tbo.resv); + exclusive = bo->tbo.moving; } - if (bo) + if (bo) { flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); - else + bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); + } else { flags = 0x0; + } - if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv)) + if (clear || (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv)) last_update = &vm->last_update; else last_update = &bo_va->last_pt_update; @@ -2054,26 +1744,21 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, list_for_each_entry(mapping, &bo_va->invalids, list) { r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm, - mapping, flags, nodes, + mapping, flags, bo_adev, nodes, last_update); if (r) return r; } - if (vm->use_cpu_for_update) { - /* Flush HDP */ - mb(); - amdgpu_asic_flush_hdp(adev, NULL); - } - /* If the BO is not in its preferred location add it back to * the evicted list so that it gets validated again on the * next command submission. */ - if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) { + if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) { uint32_t mem_type = bo->tbo.mem.mem_type; - if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type))) + if (!(bo->preferred_domains & + amdgpu_mem_type_to_domain(mem_type))) amdgpu_vm_bo_evicted(&bo_va->base); else amdgpu_vm_bo_idle(&bo_va->base); @@ -2206,18 +1891,18 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, */ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - struct reservation_object *resv = vm->root.base.bo->tbo.resv; + struct dma_resv *resv = vm->root.base.bo->tbo.base.resv; struct dma_fence *excl, **shared; unsigned i, shared_count; int r; - r = reservation_object_get_fences_rcu(resv, &excl, + r = dma_resv_get_fences_rcu(resv, &excl, &shared_count, &shared); if (r) { /* Not enough memory to grab the fence list, as last resort * block for all the fences to complete. */ - reservation_object_wait_timeout_rcu(resv, true, false, + dma_resv_wait_timeout_rcu(resv, true, false, MAX_SCHEDULE_TIMEOUT); return; } @@ -2267,9 +1952,9 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, mapping->start < AMDGPU_GMC_HOLE_START) init_pte_value = AMDGPU_PTE_DEFAULT_ATC; - r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, + r = amdgpu_vm_bo_update_mapping(adev, vm, false, NULL, mapping->start, mapping->last, - init_pte_value, 0, &f); + init_pte_value, 0, NULL, &f); amdgpu_vm_free_mapping(adev, vm, mapping, f); if (r) { dma_fence_put(f); @@ -2305,7 +1990,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdgpu_bo_va *bo_va, *tmp; - struct reservation_object *resv; + struct dma_resv *resv; bool clear; int r; @@ -2320,11 +2005,11 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, while (!list_empty(&vm->invalidated)) { bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, base.vm_status); - resv = bo_va->base.bo->tbo.resv; + resv = bo_va->base.bo->tbo.base.resv; spin_unlock(&vm->invalidated_lock); /* Try to reserve the BO to avoid clearing its ptes */ - if (!amdgpu_vm_debug && reservation_object_trylock(resv)) + if (!amdgpu_vm_debug && dma_resv_trylock(resv)) clear = false; /* Somebody else is using the BO right now */ else @@ -2335,7 +2020,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, return r; if (!clear) - reservation_object_unlock(resv); + dma_resv_unlock(resv); spin_lock(&vm->invalidated_lock); } spin_unlock(&vm->invalidated_lock); @@ -2374,6 +2059,16 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, INIT_LIST_HEAD(&bo_va->valids); INIT_LIST_HEAD(&bo_va->invalids); + if (bo && amdgpu_xgmi_same_hive(adev, amdgpu_ttm_adev(bo->tbo.bdev)) && + (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM)) { + bo_va->is_xgmi = true; + mutex_lock(&adev->vm_manager.lock_pstate); + /* Power up XGMI if it can be potentially used */ + if (++adev->vm_manager.xgmi_map_counter == 1) + amdgpu_xgmi_set_pstate(adev, 1); + mutex_unlock(&adev->vm_manager.lock_pstate); + } + return bo_va; } @@ -2401,7 +2096,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, if (mapping->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev); - if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv && + if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv && !bo_va->base.moved) { list_move(&bo_va->base.vm_status, &vm->moved); } @@ -2733,7 +2428,8 @@ void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket) struct amdgpu_bo *bo; bo = mapping->bo_va->base.bo; - if (READ_ONCE(bo->tbo.resv->lock.ctx) != ticket) + if (dma_resv_locking_ctx(bo->tbo.base.resv) != + ticket) continue; } @@ -2760,7 +2456,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_vm_bo_base **base; if (bo) { - if (bo->tbo.resv == vm->root.base.bo->tbo.resv) + if (bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) vm->bulk_moveable = false; for (base = &bo_va->base.bo->vm_bo; *base; @@ -2792,6 +2488,14 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, } dma_fence_put(bo_va->last_pt_update); + + if (bo && bo_va->is_xgmi) { + mutex_lock(&adev->vm_manager.lock_pstate); + if (--adev->vm_manager.xgmi_map_counter == 0) + amdgpu_xgmi_set_pstate(adev, 0); + mutex_unlock(&adev->vm_manager.lock_pstate); + } + kfree(bo_va); } @@ -2816,7 +2520,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { struct amdgpu_vm *vm = bo_base->vm; - if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { + if (evicted && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) { amdgpu_vm_bo_evicted(bo_base); continue; } @@ -2827,7 +2531,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, if (bo->tbo.type == ttm_bo_type_kernel) amdgpu_vm_bo_relocated(bo_base); - else if (bo->tbo.resv == vm->root.base.bo->tbo.resv) + else if (bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) amdgpu_vm_bo_moved(bo_base); else amdgpu_vm_bo_invalidated(bo_base); @@ -2949,20 +2653,16 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, adev->vm_manager.fragment_size); } -static struct amdgpu_retryfault_hashtable *init_fault_hash(void) +/** + * amdgpu_vm_wait_idle - wait for the VM to become idle + * + * @vm: VM object to wait for + * @timeout: timeout to wait for VM to become idle + */ +long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) { - struct amdgpu_retryfault_hashtable *fault_hash; - - fault_hash = kmalloc(sizeof(*fault_hash), GFP_KERNEL); - if (!fault_hash) - return fault_hash; - - INIT_CHASH_TABLE(fault_hash->hash, - AMDGPU_PAGEFAULT_HASH_BITS, 8, 0); - spin_lock_init(&fault_hash->lock); - fault_hash->count = 0; - - return fault_hash; + return dma_resv_wait_timeout_rcu(vm->root.base.bo->tbo.base.resv, + true, true, timeout); } /** @@ -2996,12 +2696,17 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, spin_lock_init(&vm->invalidated_lock); INIT_LIST_HEAD(&vm->freed); - /* create scheduler entity for page table updates */ - r = drm_sched_entity_init(&vm->entity, adev->vm_manager.vm_pte_rqs, + /* create scheduler entities for page table updates */ + r = drm_sched_entity_init(&vm->direct, adev->vm_manager.vm_pte_rqs, adev->vm_manager.vm_pte_num_rqs, NULL); if (r) return r; + r = drm_sched_entity_init(&vm->delayed, adev->vm_manager.vm_pte_rqs, + adev->vm_manager.vm_pte_num_rqs, NULL); + if (r) + goto error_free_direct; + vm->pte_support_ats = false; if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { @@ -3016,30 +2721,34 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, } DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); - WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)), + WARN_ONCE((vm->use_cpu_for_update && + !amdgpu_gmc_vram_full_visible(&adev->gmc)), "CPU update of VM recommended only for large BAR system\n"); + + if (vm->use_cpu_for_update) + vm->update_funcs = &amdgpu_vm_cpu_funcs; + else + vm->update_funcs = &amdgpu_vm_sdma_funcs; vm->last_update = NULL; - amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, &bp); + amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp); if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW; r = amdgpu_bo_create(adev, &bp, &root); if (r) - goto error_free_sched_entity; + goto error_free_delayed; r = amdgpu_bo_reserve(root, true); if (r) goto error_free_root; - r = reservation_object_reserve_shared(root->tbo.resv, 1); + r = dma_resv_reserve_shared(root->tbo.base.resv, 1); if (r) goto error_unreserve; amdgpu_vm_bo_base_init(&vm->root.base, vm, root); - r = amdgpu_vm_clear_bo(adev, vm, root, - adev->vm_manager.root_level, - vm->pte_support_ats); + r = amdgpu_vm_clear_bo(adev, vm, root, false); if (r) goto error_unreserve; @@ -3058,12 +2767,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->pasid = pasid; } - vm->fault_hash = init_fault_hash(); - if (!vm->fault_hash) { - r = -ENOMEM; - goto error_free_root; - } - INIT_KFIFO(vm->faults); return 0; @@ -3076,17 +2779,52 @@ error_free_root: amdgpu_bo_unref(&vm->root.base.bo); vm->root.base.bo = NULL; -error_free_sched_entity: - drm_sched_entity_destroy(&vm->entity); +error_free_delayed: + drm_sched_entity_destroy(&vm->delayed); + +error_free_direct: + drm_sched_entity_destroy(&vm->direct); return r; } /** + * amdgpu_vm_check_clean_reserved - check if a VM is clean + * + * @adev: amdgpu_device pointer + * @vm: the VM to check + * + * check all entries of the root PD, if any subsequent PDs are allocated, + * it means there are page table creating and filling, and is no a clean + * VM + * + * Returns: + * 0 if this VM is clean + */ +static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev, + struct amdgpu_vm *vm) +{ + enum amdgpu_vm_level root = adev->vm_manager.root_level; + unsigned int entries = amdgpu_vm_num_entries(adev, root); + unsigned int i = 0; + + if (!(vm->root.entries)) + return 0; + + for (i = 0; i < entries; i++) { + if (vm->root.entries[i].base.bo) + return -EINVAL; + } + + return 0; +} + +/** * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM * * @adev: amdgpu_device pointer * @vm: requested vm + * @pasid: pasid to use * * This only works on GFX VMs that don't have any BOs added and no * page tables allocated yet. @@ -3102,7 +2840,8 @@ error_free_sched_entity: * Returns: * 0 for success, -errno for errors. */ -int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid) +int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, + unsigned int pasid) { bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); int r; @@ -3112,10 +2851,9 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns return r; /* Sanity checks */ - if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) { - r = -EINVAL; + r = amdgpu_vm_check_clean_reserved(adev, vm); + if (r) goto unreserve_bo; - } if (pasid) { unsigned long flags; @@ -3134,9 +2872,8 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns * changing any other state, in case it fails. */ if (pte_support_ats != vm->pte_support_ats) { - r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, - adev->vm_manager.root_level, - pte_support_ats); + vm->pte_support_ats = pte_support_ats; + r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, false); if (r) goto free_idr; } @@ -3144,12 +2881,19 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns /* Update VM state */ vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_COMPUTE); - vm->pte_support_ats = pte_support_ats; DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); - WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)), + WARN_ONCE((vm->use_cpu_for_update && + !amdgpu_gmc_vram_full_visible(&adev->gmc)), "CPU update of VM recommended only for large BAR system\n"); + if (vm->use_cpu_for_update) + vm->update_funcs = &amdgpu_vm_cpu_funcs; + else + vm->update_funcs = &amdgpu_vm_sdma_funcs; + dma_fence_put(vm->last_update); + vm->last_update = NULL; + if (vm->pasid) { unsigned long flags; @@ -3219,27 +2963,38 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) struct amdgpu_bo_va_mapping *mapping, *tmp; bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt; struct amdgpu_bo *root; - u64 fault; - int i, r; + int i; amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); - /* Clear pending page faults from IH when the VM is destroyed */ - while (kfifo_get(&vm->faults, &fault)) - amdgpu_vm_clear_fault(vm->fault_hash, fault); - + root = amdgpu_bo_ref(vm->root.base.bo); + amdgpu_bo_reserve(root, true); if (vm->pasid) { unsigned long flags; spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); + vm->pasid = 0; } - kfree(vm->fault_hash); - vm->fault_hash = NULL; + list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { + if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) { + amdgpu_vm_prt_fini(adev, vm); + prt_fini_needed = false; + } - drm_sched_entity_destroy(&vm->entity); + list_del(&mapping->list); + amdgpu_vm_free_mapping(adev, vm, mapping, NULL); + } + + amdgpu_vm_free_pts(adev, vm, NULL); + amdgpu_bo_unreserve(root); + amdgpu_bo_unref(&root); + WARN_ON(vm->root.base.bo); + + drm_sched_entity_destroy(&vm->direct); + drm_sched_entity_destroy(&vm->delayed); if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { dev_err(adev->dev, "still active bo inside vm\n"); @@ -3252,25 +3007,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) list_del(&mapping->list); kfree(mapping); } - list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { - if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) { - amdgpu_vm_prt_fini(adev, vm); - prt_fini_needed = false; - } - - list_del(&mapping->list); - amdgpu_vm_free_mapping(adev, vm, mapping, NULL); - } - root = amdgpu_bo_ref(vm->root.base.bo); - r = amdgpu_bo_reserve(root, true); - if (r) { - dev_err(adev->dev, "Leaking page tables because BO reservation failed\n"); - } else { - amdgpu_vm_free_pts(adev, vm); - amdgpu_bo_unreserve(root); - } - amdgpu_bo_unref(&root); dma_fence_put(vm->last_update); for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) amdgpu_vmid_free_reserved(adev, vm, i); @@ -3315,6 +3052,9 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) idr_init(&adev->vm_manager.pasid_idr); spin_lock_init(&adev->vm_manager.pasid_lock); + + adev->vm_manager.xgmi_map_counter = 0; + mutex_init(&adev->vm_manager.lock_pstate); } /** @@ -3351,13 +3091,14 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) switch (args->in.op) { case AMDGPU_VM_OP_RESERVE_VMID: - /* current, we only have requirement to reserve vmid from gfxhub */ - r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB); + /* We only have requirement to reserve vmid from gfxhub */ + r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, + AMDGPU_GFXHUB_0); if (r) return r; break; case AMDGPU_VM_OP_UNRESERVE_VMID: - amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB); + amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0); break; default: return -EINVAL; @@ -3395,88 +3136,88 @@ void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid, */ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) { - if (!vm->task_info.pid) { - vm->task_info.pid = current->pid; - get_task_comm(vm->task_info.task_name, current); + if (vm->task_info.pid) + return; - if (current->group_leader->mm == current->mm) { - vm->task_info.tgid = current->group_leader->pid; - get_task_comm(vm->task_info.process_name, current->group_leader); - } - } + vm->task_info.pid = current->pid; + get_task_comm(vm->task_info.task_name, current); + + if (current->group_leader->mm != current->mm) + return; + + vm->task_info.tgid = current->group_leader->pid; + get_task_comm(vm->task_info.process_name, current->group_leader); } /** - * amdgpu_vm_add_fault - Add a page fault record to fault hash table - * - * @fault_hash: fault hash table - * @key: 64-bit encoding of PASID and address - * - * This should be called when a retry page fault interrupt is - * received. If this is a new page fault, it will be added to a hash - * table. The return value indicates whether this is a new fault, or - * a fault that was already known and is already being handled. - * - * If there are too many pending page faults, this will fail. Retry - * interrupts should be ignored in this case until there is enough - * free space. + * amdgpu_vm_handle_fault - graceful handling of VM faults. + * @adev: amdgpu device pointer + * @pasid: PASID of the VM + * @addr: Address of the fault * - * Returns 0 if the fault was added, 1 if the fault was already known, - * -ENOSPC if there are too many pending faults. + * Try to gracefully handle a VM fault. Return true if the fault was handled and + * shouldn't be reported any more. */ -int amdgpu_vm_add_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key) +bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid, + uint64_t addr) { - unsigned long flags; - int r = -ENOSPC; - - if (WARN_ON_ONCE(!fault_hash)) - /* Should be allocated in amdgpu_vm_init - */ - return r; - - spin_lock_irqsave(&fault_hash->lock, flags); + struct amdgpu_bo *root; + uint64_t value, flags; + struct amdgpu_vm *vm; + long r; - /* Only let the hash table fill up to 50% for best performance */ - if (fault_hash->count >= (1 << (AMDGPU_PAGEFAULT_HASH_BITS-1))) - goto unlock_out; + spin_lock(&adev->vm_manager.pasid_lock); + vm = idr_find(&adev->vm_manager.pasid_idr, pasid); + if (vm) + root = amdgpu_bo_ref(vm->root.base.bo); + else + root = NULL; + spin_unlock(&adev->vm_manager.pasid_lock); - r = chash_table_copy_in(&fault_hash->hash, key, NULL); - if (!r) - fault_hash->count++; + if (!root) + return false; - /* chash_table_copy_in should never fail unless we're losing count */ - WARN_ON_ONCE(r < 0); + r = amdgpu_bo_reserve(root, true); + if (r) + goto error_unref; -unlock_out: - spin_unlock_irqrestore(&fault_hash->lock, flags); - return r; -} + /* Double check that the VM still exists */ + spin_lock(&adev->vm_manager.pasid_lock); + vm = idr_find(&adev->vm_manager.pasid_idr, pasid); + if (vm && vm->root.base.bo != root) + vm = NULL; + spin_unlock(&adev->vm_manager.pasid_lock); + if (!vm) + goto error_unlock; + + addr /= AMDGPU_GPU_PAGE_SIZE; + flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED | + AMDGPU_PTE_SYSTEM; + + if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { + /* Redirect the access to the dummy page */ + value = adev->dummy_page_addr; + flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE | + AMDGPU_PTE_WRITEABLE; + } else { + /* Let the hw retry silently on the PTE */ + value = 0; + } -/** - * amdgpu_vm_clear_fault - Remove a page fault record - * - * @fault_hash: fault hash table - * @key: 64-bit encoding of PASID and address - * - * This should be called when a page fault has been handled. Any - * future interrupt with this key will be processed as a new - * page fault. - */ -void amdgpu_vm_clear_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key) -{ - unsigned long flags; - int r; + r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1, + flags, value, NULL, NULL); + if (r) + goto error_unlock; - if (!fault_hash) - return; + r = amdgpu_vm_update_pdes(adev, vm, true); - spin_lock_irqsave(&fault_hash->lock, flags); +error_unlock: + amdgpu_bo_unreserve(root); + if (r < 0) + DRM_ERROR("Can't handle page fault (%ld)\n", r); - r = chash_table_remove(&fault_hash->hash, key, NULL); - if (!WARN_ON_ONCE(r < 0)) { - fault_hash->count--; - WARN_ON_ONCE(fault_hash->count < 0); - } +error_unref: + amdgpu_bo_unref(&root); - spin_unlock_irqrestore(&fault_hash->lock, flags); + return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 81ff8177f092..4dbbe1b6b413 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -30,7 +30,6 @@ #include <drm/gpu_scheduler.h> #include <drm/drm_file.h> #include <drm/ttm/ttm_bo_driver.h> -#include <linux/chash.h> #include "amdgpu_sync.h" #include "amdgpu_ring.h" @@ -68,6 +67,8 @@ struct amdgpu_bo_list_entry; /* PDE is handled as PTE for VEGA10 */ #define AMDGPU_PDE_PTE (1ULL << 54) +#define AMDGPU_PTE_LOG (1ULL << 55) + /* PTE is handled as PDE for VEGA10 (Translate Further) */ #define AMDGPU_PTE_TF (1ULL << 56) @@ -76,8 +77,8 @@ struct amdgpu_bo_list_entry; /* For GFX9 */ -#define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) -#define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) +#define AMDGPU_PTE_MTYPE_VG10(a) ((uint64_t)(a) << 57) +#define AMDGPU_PTE_MTYPE_VG10_MASK AMDGPU_PTE_MTYPE_VG10(3ULL) #define AMDGPU_MTYPE_NC 0 #define AMDGPU_MTYPE_CC 2 @@ -87,17 +88,25 @@ struct amdgpu_bo_list_entry; | AMDGPU_PTE_EXECUTABLE \ | AMDGPU_PTE_READABLE \ | AMDGPU_PTE_WRITEABLE \ - | AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_CC)) + | AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_CC)) + +/* gfx10 */ +#define AMDGPU_PTE_MTYPE_NV10(a) ((uint64_t)(a) << 48) +#define AMDGPU_PTE_MTYPE_NV10_MASK AMDGPU_PTE_MTYPE_NV10(7ULL) /* How to programm VM fault handling */ #define AMDGPU_VM_FAULT_STOP_NEVER 0 #define AMDGPU_VM_FAULT_STOP_FIRST 1 #define AMDGPU_VM_FAULT_STOP_ALWAYS 2 +/* Reserve 4MB VRAM for page tables */ +#define AMDGPU_VM_RESERVED_VRAM (4ULL << 20) + /* max number of VMHUB */ -#define AMDGPU_MAX_VMHUBS 2 -#define AMDGPU_GFXHUB 0 -#define AMDGPU_MMHUB 1 +#define AMDGPU_MAX_VMHUBS 3 +#define AMDGPU_GFXHUB_0 0 +#define AMDGPU_MMHUB_0 1 +#define AMDGPU_MMHUB_1 2 /* hardcode that limit for now */ #define AMDGPU_VA_RESERVED_SIZE (1ULL << 20) @@ -140,7 +149,6 @@ struct amdgpu_vm_bo_base { struct amdgpu_vm_pt { struct amdgpu_vm_bo_base base; - bool huge; /* array of page tables, one for each directory entry */ struct amdgpu_vm_pt *entries; @@ -167,11 +175,6 @@ struct amdgpu_vm_pte_funcs { uint32_t incr, uint64_t flags); }; -#define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) | (addr)) -#define AMDGPU_VM_FAULT_PASID(fault) ((u64)(fault) >> 48) -#define AMDGPU_VM_FAULT_ADDR(fault) ((u64)(fault) & 0xfffffffff000ULL) - - struct amdgpu_task_info { char process_name[TASK_COMM_LEN]; char task_name[TASK_COMM_LEN]; @@ -179,11 +182,57 @@ struct amdgpu_task_info { pid_t tgid; }; -#define AMDGPU_PAGEFAULT_HASH_BITS 8 -struct amdgpu_retryfault_hashtable { - DECLARE_CHASH_TABLE(hash, AMDGPU_PAGEFAULT_HASH_BITS, 8, 0); - spinlock_t lock; - int count; +/** + * struct amdgpu_vm_update_params + * + * Encapsulate some VM table update parameters to reduce + * the number of function parameters + * + */ +struct amdgpu_vm_update_params { + + /** + * @adev: amdgpu device we do this update for + */ + struct amdgpu_device *adev; + + /** + * @vm: optional amdgpu_vm we do this update for + */ + struct amdgpu_vm *vm; + + /** + * @direct: if changes should be made directly + */ + bool direct; + + /** + * @pages_addr: + * + * DMA addresses to use for mapping + */ + dma_addr_t *pages_addr; + + /** + * @job: job to used for hw submission + */ + struct amdgpu_job *job; + + /** + * @num_dw_left: number of dw left for the IB + */ + unsigned int num_dw_left; +}; + +struct amdgpu_vm_update_funcs { + int (*map_table)(struct amdgpu_bo *bo); + int (*prepare)(struct amdgpu_vm_update_params *p, void * owner, + struct dma_fence *exclusive); + int (*update)(struct amdgpu_vm_update_params *p, + struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, + unsigned count, uint32_t incr, uint64_t flags); + int (*commit)(struct amdgpu_vm_update_params *p, + struct dma_fence **fence); }; struct amdgpu_vm { @@ -213,15 +262,19 @@ struct amdgpu_vm { struct amdgpu_vm_pt root; struct dma_fence *last_update; - /* Scheduler entity for page table updates */ - struct drm_sched_entity entity; + /* Scheduler entities for page table updates */ + struct drm_sched_entity direct; + struct drm_sched_entity delayed; unsigned int pasid; /* dedicated to vm */ struct amdgpu_vmid *reserved_vmid[AMDGPU_MAX_VMHUBS]; /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ - bool use_cpu_for_update; + bool use_cpu_for_update; + + /* Functions to use for VM table updates */ + const struct amdgpu_vm_update_funcs *update_funcs; /* Flag to indicate ATS support from PTE for GFX9 */ bool pte_support_ats; @@ -245,7 +298,6 @@ struct amdgpu_vm { struct ttm_lru_bulk_move lru_bulk_move; /* mark whether can do the bulk move */ bool bulk_moveable; - struct amdgpu_retryfault_hashtable *fault_hash; }; struct amdgpu_vm_manager { @@ -267,6 +319,7 @@ struct amdgpu_vm_manager { const struct amdgpu_vm_pte_funcs *vm_pte_funcs; struct drm_sched_rq *vm_pte_rqs[AMDGPU_MAX_RINGS]; unsigned vm_pte_num_rqs; + struct amdgpu_ring *page_fault; /* partial resident texture handling */ spinlock_t prt_lock; @@ -283,14 +336,23 @@ struct amdgpu_vm_manager { */ struct idr pasid_idr; spinlock_t pasid_lock; + + /* counter of mapped memory through xgmi */ + uint32_t xgmi_map_counter; + struct mutex lock_pstate; }; #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr))) #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) +extern const struct amdgpu_vm_update_funcs amdgpu_vm_cpu_funcs; +extern const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs; + void amdgpu_vm_manager_init(struct amdgpu_device *adev); void amdgpu_vm_manager_fini(struct amdgpu_device *adev); + +long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout); int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int vm_context, unsigned int pasid); int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid); @@ -303,12 +365,9 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm); int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*callback)(void *p, struct amdgpu_bo *bo), void *param); -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - uint64_t saddr, uint64_t size); int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync); -int amdgpu_vm_update_directories(struct amdgpu_device *adev, - struct amdgpu_vm *vm); +int amdgpu_vm_update_pdes(struct amdgpu_device *adev, + struct amdgpu_vm *vm, bool direct); int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence); @@ -319,6 +378,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, bool clear); void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, struct amdgpu_bo *bo, bool evicted); +uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr); struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, struct amdgpu_bo *bo); struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, @@ -353,16 +413,13 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid, struct amdgpu_task_info *task_info); +bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid, + uint64_t addr); void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct amdgpu_vm *vm); - -int amdgpu_vm_add_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key); - -void amdgpu_vm_clear_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key); - void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c new file mode 100644 index 000000000000..73fec7a0ced5 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -0,0 +1,127 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "amdgpu_vm.h" +#include "amdgpu_object.h" +#include "amdgpu_trace.h" + +/** + * amdgpu_vm_cpu_map_table - make sure new PDs/PTs are kmapped + * + * @table: newly allocated or validated PD/PT + */ +static int amdgpu_vm_cpu_map_table(struct amdgpu_bo *table) +{ + return amdgpu_bo_kmap(table, NULL); +} + +/** + * amdgpu_vm_cpu_prepare - prepare page table update with the CPU + * + * @p: see amdgpu_vm_update_params definition + * @owner: owner we need to sync to + * @exclusive: exclusive move fence we need to sync to + * + * Returns: + * Negativ errno, 0 for success. + */ +static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, void *owner, + struct dma_fence *exclusive) +{ + int r; + + /* Wait for any BO move to be completed */ + if (exclusive) { + r = dma_fence_wait(exclusive, true); + if (unlikely(r)) + return r; + } + + /* Don't wait for submissions during page fault */ + if (p->direct) + return 0; + + /* Wait for PT BOs to be idle. PTs share the same resv. object + * as the root PD BO + */ + return amdgpu_bo_sync_wait(p->vm->root.base.bo, owner, true); +} + +/** + * amdgpu_vm_cpu_update - helper to update page tables via CPU + * + * @p: see amdgpu_vm_update_params definition + * @bo: PD/PT to update + * @pe: kmap addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: hw access flags + * + * Write count number of PT/PD entries directly. + */ +static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p, + struct amdgpu_bo *bo, uint64_t pe, + uint64_t addr, unsigned count, uint32_t incr, + uint64_t flags) +{ + unsigned int i; + uint64_t value; + + pe += (unsigned long)amdgpu_bo_kptr(bo); + + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->direct); + + for (i = 0; i < count; i++) { + value = p->pages_addr ? + amdgpu_vm_map_gart(p->pages_addr, addr) : + addr; + amdgpu_gmc_set_pte_pde(p->adev, (void *)(uintptr_t)pe, + i, value, flags); + addr += incr; + } + return 0; +} + +/** + * amdgpu_vm_cpu_commit - commit page table update to the HW + * + * @p: see amdgpu_vm_update_params definition + * @fence: unused + * + * Make sure that the hardware sees the page table updates. + */ +static int amdgpu_vm_cpu_commit(struct amdgpu_vm_update_params *p, + struct dma_fence **fence) +{ + /* Flush HDP */ + mb(); + amdgpu_asic_flush_hdp(p->adev, NULL); + return 0; +} + +const struct amdgpu_vm_update_funcs amdgpu_vm_cpu_funcs = { + .map_table = amdgpu_vm_cpu_map_table, + .prepare = amdgpu_vm_cpu_prepare, + .update = amdgpu_vm_cpu_update, + .commit = amdgpu_vm_cpu_commit +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c new file mode 100644 index 000000000000..832db59f441e --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -0,0 +1,272 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "amdgpu_vm.h" +#include "amdgpu_job.h" +#include "amdgpu_object.h" +#include "amdgpu_trace.h" + +#define AMDGPU_VM_SDMA_MIN_NUM_DW 256u +#define AMDGPU_VM_SDMA_MAX_NUM_DW (16u * 1024u) + +/** + * amdgpu_vm_sdma_map_table - make sure new PDs/PTs are GTT mapped + * + * @table: newly allocated or validated PD/PT + */ +static int amdgpu_vm_sdma_map_table(struct amdgpu_bo *table) +{ + int r; + + r = amdgpu_ttm_alloc_gart(&table->tbo); + if (r) + return r; + + if (table->shadow) + r = amdgpu_ttm_alloc_gart(&table->shadow->tbo); + + return r; +} + +/** + * amdgpu_vm_sdma_prepare - prepare SDMA command submission + * + * @p: see amdgpu_vm_update_params definition + * @owner: owner we need to sync to + * @exclusive: exclusive move fence we need to sync to + * + * Returns: + * Negativ errno, 0 for success. + */ +static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, + void *owner, struct dma_fence *exclusive) +{ + struct amdgpu_bo *root = p->vm->root.base.bo; + unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW; + int r; + + r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, &p->job); + if (r) + return r; + + p->num_dw_left = ndw; + + /* Wait for moves to be completed */ + r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false); + if (r) + return r; + + /* Don't wait for any submissions during page fault handling */ + if (p->direct) + return 0; + + return amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.base.resv, + owner, false); +} + +/** + * amdgpu_vm_sdma_commit - commit SDMA command submission + * + * @p: see amdgpu_vm_update_params definition + * @fence: resulting fence + * + * Returns: + * Negativ errno, 0 for success. + */ +static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, + struct dma_fence **fence) +{ + struct amdgpu_bo *root = p->vm->root.base.bo; + struct amdgpu_ib *ib = p->job->ibs; + struct drm_sched_entity *entity; + struct amdgpu_ring *ring; + struct dma_fence *f; + int r; + + entity = p->direct ? &p->vm->direct : &p->vm->delayed; + ring = container_of(entity->rq->sched, struct amdgpu_ring, sched); + + WARN_ON(ib->length_dw == 0); + amdgpu_ring_pad_ib(ring, ib); + WARN_ON(ib->length_dw > p->num_dw_left); + r = amdgpu_job_submit(p->job, entity, AMDGPU_FENCE_OWNER_VM, &f); + if (r) + goto error; + + amdgpu_bo_fence(root, f, true); + if (fence && !p->direct) + swap(*fence, f); + dma_fence_put(f); + return 0; + +error: + amdgpu_job_free(p->job); + return r; +} + +/** + * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping + * + * @p: see amdgpu_vm_update_params definition + * @bo: PD/PT to update + * @pe: addr of the page entry + * @count: number of page entries to copy + * + * Traces the parameters and calls the DMA function to copy the PTEs. + */ +static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p, + struct amdgpu_bo *bo, uint64_t pe, + unsigned count) +{ + struct amdgpu_ib *ib = p->job->ibs; + uint64_t src = ib->gpu_addr; + + src += p->num_dw_left * 4; + + pe += amdgpu_bo_gpu_offset(bo); + trace_amdgpu_vm_copy_ptes(pe, src, count, p->direct); + + amdgpu_vm_copy_pte(p->adev, ib, pe, src, count); +} + +/** + * amdgpu_vm_sdma_set_ptes - helper to call the right asic function + * + * @p: see amdgpu_vm_update_params definition + * @bo: PD/PT to update + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: hw access flags + * + * Traces the parameters and calls the right asic functions + * to setup the page table using the DMA. + */ +static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p, + struct amdgpu_bo *bo, uint64_t pe, + uint64_t addr, unsigned count, + uint32_t incr, uint64_t flags) +{ + struct amdgpu_ib *ib = p->job->ibs; + + pe += amdgpu_bo_gpu_offset(bo); + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->direct); + if (count < 3) { + amdgpu_vm_write_pte(p->adev, ib, pe, addr | flags, + count, incr); + } else { + amdgpu_vm_set_pte_pde(p->adev, ib, pe, addr, + count, incr, flags); + } +} + +/** + * amdgpu_vm_sdma_update - execute VM update + * + * @p: see amdgpu_vm_update_params definition + * @bo: PD/PT to update + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: hw access flags + * + * Reserve space in the IB, setup mapping buffer on demand and write commands to + * the IB. + */ +static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, + struct amdgpu_bo *bo, uint64_t pe, + uint64_t addr, unsigned count, uint32_t incr, + uint64_t flags) +{ + unsigned int i, ndw, nptes; + uint64_t *pte; + int r; + + do { + ndw = p->num_dw_left; + ndw -= p->job->ibs->length_dw; + + if (ndw < 32) { + r = amdgpu_vm_sdma_commit(p, NULL); + if (r) + return r; + + /* estimate how many dw we need */ + ndw = 32; + if (p->pages_addr) + ndw += count * 2; + ndw = max(ndw, AMDGPU_VM_SDMA_MIN_NUM_DW); + ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW); + + r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, &p->job); + if (r) + return r; + + p->num_dw_left = ndw; + } + + if (!p->pages_addr) { + /* set page commands needed */ + if (bo->shadow) + amdgpu_vm_sdma_set_ptes(p, bo->shadow, pe, addr, + count, incr, flags); + amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count, + incr, flags); + return 0; + } + + /* copy commands needed */ + ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw * + (bo->shadow ? 2 : 1); + + /* for padding */ + ndw -= 7; + + nptes = min(count, ndw / 2); + + /* Put the PTEs at the end of the IB. */ + p->num_dw_left -= nptes * 2; + pte = (uint64_t *)&(p->job->ibs->ptr[p->num_dw_left]); + for (i = 0; i < nptes; ++i, addr += incr) { + pte[i] = amdgpu_vm_map_gart(p->pages_addr, addr); + pte[i] |= flags; + } + + if (bo->shadow) + amdgpu_vm_sdma_copy_ptes(p, bo->shadow, pe, nptes); + amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes); + + pe += nptes * 8; + count -= nptes; + } while (count); + + return 0; +} + +const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs = { + .map_table = amdgpu_vm_sdma_map_table, + .prepare = amdgpu_vm_sdma_prepare, + .update = amdgpu_vm_sdma_update, + .commit = amdgpu_vm_sdma_commit +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 3f9d5d00c9b3..82a3299e53c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -22,8 +22,10 @@ * Authors: Christian König */ -#include <drm/drmP.h> #include "amdgpu.h" +#include "amdgpu_vm.h" +#include "amdgpu_atomfirmware.h" +#include "atom.h" struct amdgpu_vram_mgr { struct drm_mm mm; @@ -33,6 +35,120 @@ struct amdgpu_vram_mgr { }; /** + * DOC: mem_info_vram_total + * + * The amdgpu driver provides a sysfs API for reporting current total VRAM + * available on the device + * The file mem_info_vram_total is used for this and returns the total + * amount of VRAM in bytes + */ +static ssize_t amdgpu_mem_info_vram_total_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + return snprintf(buf, PAGE_SIZE, "%llu\n", adev->gmc.real_vram_size); +} + +/** + * DOC: mem_info_vis_vram_total + * + * The amdgpu driver provides a sysfs API for reporting current total + * visible VRAM available on the device + * The file mem_info_vis_vram_total is used for this and returns the total + * amount of visible VRAM in bytes + */ +static ssize_t amdgpu_mem_info_vis_vram_total_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + return snprintf(buf, PAGE_SIZE, "%llu\n", adev->gmc.visible_vram_size); +} + +/** + * DOC: mem_info_vram_used + * + * The amdgpu driver provides a sysfs API for reporting current total VRAM + * available on the device + * The file mem_info_vram_used is used for this and returns the total + * amount of currently used VRAM in bytes + */ +static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + return snprintf(buf, PAGE_SIZE, "%llu\n", + amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM])); +} + +/** + * DOC: mem_info_vis_vram_used + * + * The amdgpu driver provides a sysfs API for reporting current total of + * used visible VRAM + * The file mem_info_vis_vram_used is used for this and returns the total + * amount of currently used visible VRAM in bytes + */ +static ssize_t amdgpu_mem_info_vis_vram_used_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + return snprintf(buf, PAGE_SIZE, "%llu\n", + amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM])); +} + +static ssize_t amdgpu_mem_info_vram_vendor(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + switch (adev->gmc.vram_vendor) { + case SAMSUNG: + return snprintf(buf, PAGE_SIZE, "samsung\n"); + case INFINEON: + return snprintf(buf, PAGE_SIZE, "infineon\n"); + case ELPIDA: + return snprintf(buf, PAGE_SIZE, "elpida\n"); + case ETRON: + return snprintf(buf, PAGE_SIZE, "etron\n"); + case NANYA: + return snprintf(buf, PAGE_SIZE, "nanya\n"); + case HYNIX: + return snprintf(buf, PAGE_SIZE, "hynix\n"); + case MOSEL: + return snprintf(buf, PAGE_SIZE, "mosel\n"); + case WINBOND: + return snprintf(buf, PAGE_SIZE, "winbond\n"); + case ESMT: + return snprintf(buf, PAGE_SIZE, "esmt\n"); + case MICRON: + return snprintf(buf, PAGE_SIZE, "micron\n"); + default: + return snprintf(buf, PAGE_SIZE, "unknown\n"); + } +} + +static DEVICE_ATTR(mem_info_vram_total, S_IRUGO, + amdgpu_mem_info_vram_total_show, NULL); +static DEVICE_ATTR(mem_info_vis_vram_total, S_IRUGO, + amdgpu_mem_info_vis_vram_total_show,NULL); +static DEVICE_ATTR(mem_info_vram_used, S_IRUGO, + amdgpu_mem_info_vram_used_show, NULL); +static DEVICE_ATTR(mem_info_vis_vram_used, S_IRUGO, + amdgpu_mem_info_vis_vram_used_show, NULL); +static DEVICE_ATTR(mem_info_vram_vendor, S_IRUGO, + amdgpu_mem_info_vram_vendor, NULL); + +/** * amdgpu_vram_mgr_init - init VRAM manager and DRM MM * * @man: TTM memory type manager @@ -43,7 +159,9 @@ struct amdgpu_vram_mgr { static int amdgpu_vram_mgr_init(struct ttm_mem_type_manager *man, unsigned long p_size) { + struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); struct amdgpu_vram_mgr *mgr; + int ret; mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); if (!mgr) @@ -52,6 +170,34 @@ static int amdgpu_vram_mgr_init(struct ttm_mem_type_manager *man, drm_mm_init(&mgr->mm, 0, p_size); spin_lock_init(&mgr->lock); man->priv = mgr; + + /* Add the two VRAM-related sysfs files */ + ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_total); + if (ret) { + DRM_ERROR("Failed to create device file mem_info_vram_total\n"); + return ret; + } + ret = device_create_file(adev->dev, &dev_attr_mem_info_vis_vram_total); + if (ret) { + DRM_ERROR("Failed to create device file mem_info_vis_vram_total\n"); + return ret; + } + ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_used); + if (ret) { + DRM_ERROR("Failed to create device file mem_info_vram_used\n"); + return ret; + } + ret = device_create_file(adev->dev, &dev_attr_mem_info_vis_vram_used); + if (ret) { + DRM_ERROR("Failed to create device file mem_info_vis_vram_used\n"); + return ret; + } + ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_vendor); + if (ret) { + DRM_ERROR("Failed to create device file mem_info_vram_vendor\n"); + return ret; + } + return 0; } @@ -65,6 +211,7 @@ static int amdgpu_vram_mgr_init(struct ttm_mem_type_manager *man, */ static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man) { + struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); struct amdgpu_vram_mgr *mgr = man->priv; spin_lock(&mgr->lock); @@ -72,6 +219,11 @@ static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man) spin_unlock(&mgr->lock); kfree(mgr); man->priv = NULL; + device_remove_file(adev->dev, &dev_attr_mem_info_vram_total); + device_remove_file(adev->dev, &dev_attr_mem_info_vis_vram_total); + device_remove_file(adev->dev, &dev_attr_mem_info_vram_used); + device_remove_file(adev->dev, &dev_attr_mem_info_vis_vram_used); + device_remove_file(adev->dev, &dev_attr_mem_info_vram_vendor); return 0; } @@ -167,7 +319,7 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, struct drm_mm_node *nodes; enum drm_mm_insert_mode mode; unsigned long lpfn, num_nodes, pages_per_node, pages_left; - uint64_t usage = 0, vis_usage = 0; + uint64_t vis_usage = 0, mem_bytes, max_bytes; unsigned i; int r; @@ -175,20 +327,38 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, if (!lpfn) lpfn = man->size; - if (place->flags & TTM_PL_FLAG_CONTIGUOUS || - amdgpu_vram_page_split == -1) { + max_bytes = adev->gmc.mc_vram_size; + if (tbo->type != ttm_bo_type_kernel) + max_bytes -= AMDGPU_VM_RESERVED_VRAM; + + /* bail out quickly if there's likely not enough VRAM for this BO */ + mem_bytes = (u64)mem->num_pages << PAGE_SHIFT; + if (atomic64_add_return(mem_bytes, &mgr->usage) > max_bytes) { + atomic64_sub(mem_bytes, &mgr->usage); + mem->mm_node = NULL; + return 0; + } + + if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { pages_per_node = ~0ul; num_nodes = 1; } else { - pages_per_node = max((uint32_t)amdgpu_vram_page_split, - mem->page_alignment); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + pages_per_node = HPAGE_PMD_NR; +#else + /* default to 2MB */ + pages_per_node = (2UL << (20UL - PAGE_SHIFT)); +#endif + pages_per_node = max((uint32_t)pages_per_node, mem->page_alignment); num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node); } - nodes = kvmalloc_array(num_nodes, sizeof(*nodes), + nodes = kvmalloc_array((uint32_t)num_nodes, sizeof(*nodes), GFP_KERNEL | __GFP_ZERO); - if (!nodes) + if (!nodes) { + atomic64_sub(mem_bytes, &mgr->usage); return -ENOMEM; + } mode = DRM_MM_INSERT_BEST; if (place->flags & TTM_PL_FLAG_TOPDOWN) @@ -208,7 +378,6 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, if (unlikely(r)) break; - usage += nodes[i].size << PAGE_SHIFT; vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]); amdgpu_vram_mgr_virt_start(mem, &nodes[i]); pages_left -= pages; @@ -228,14 +397,12 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, if (unlikely(r)) goto error; - usage += nodes[i].size << PAGE_SHIFT; vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]); amdgpu_vram_mgr_virt_start(mem, &nodes[i]); pages_left -= pages; } spin_unlock(&mgr->lock); - atomic64_add(usage, &mgr->usage); atomic64_add(vis_usage, &mgr->vis_usage); mem->mm_node = nodes; @@ -246,6 +413,7 @@ error: while (i--) drm_mm_remove_node(&nodes[i]); spin_unlock(&mgr->lock); + atomic64_sub(mem->num_pages << PAGE_SHIFT, &mgr->usage); kvfree(nodes); return r == -ENOSPC ? 0 : r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 407dd16cc35c..61d13d8b7b20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -24,7 +24,9 @@ #include <linux/list.h> #include "amdgpu.h" #include "amdgpu_xgmi.h" - +#include "amdgpu_smu.h" +#include "amdgpu_ras.h" +#include "df/df_3_6_offset.h" static DEFINE_MUTEX(xgmi_mutex); @@ -34,12 +36,194 @@ static DEFINE_MUTEX(xgmi_mutex); static struct amdgpu_hive_info xgmi_hives[AMDGPU_MAX_XGMI_HIVE]; static unsigned hive_count = 0; - void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive) { return &hive->device_list; } +/** + * DOC: AMDGPU XGMI Support + * + * XGMI is a high speed interconnect that joins multiple GPU cards + * into a homogeneous memory space that is organized by a collective + * hive ID and individual node IDs, both of which are 64-bit numbers. + * + * The file xgmi_device_id contains the unique per GPU device ID and + * is stored in the /sys/class/drm/card${cardno}/device/ directory. + * + * Inside the device directory a sub-directory 'xgmi_hive_info' is + * created which contains the hive ID and the list of nodes. + * + * The hive ID is stored in: + * /sys/class/drm/card${cardno}/device/xgmi_hive_info/xgmi_hive_id + * + * The node information is stored in numbered directories: + * /sys/class/drm/card${cardno}/device/xgmi_hive_info/node${nodeno}/xgmi_device_id + * + * Each device has their own xgmi_hive_info direction with a mirror + * set of node sub-directories. + * + * The XGMI memory space is built by contiguously adding the power of + * two padded VRAM space from each node to each other. + * + */ + + +static ssize_t amdgpu_xgmi_show_hive_id(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct amdgpu_hive_info *hive = + container_of(attr, struct amdgpu_hive_info, dev_attr); + + return snprintf(buf, PAGE_SIZE, "%llu\n", hive->hive_id); +} + +static int amdgpu_xgmi_sysfs_create(struct amdgpu_device *adev, + struct amdgpu_hive_info *hive) +{ + int ret = 0; + + if (WARN_ON(hive->kobj)) + return -EINVAL; + + hive->kobj = kobject_create_and_add("xgmi_hive_info", &adev->dev->kobj); + if (!hive->kobj) { + dev_err(adev->dev, "XGMI: Failed to allocate sysfs entry!\n"); + return -EINVAL; + } + + hive->dev_attr = (struct device_attribute) { + .attr = { + .name = "xgmi_hive_id", + .mode = S_IRUGO, + + }, + .show = amdgpu_xgmi_show_hive_id, + }; + + ret = sysfs_create_file(hive->kobj, &hive->dev_attr.attr); + if (ret) { + dev_err(adev->dev, "XGMI: Failed to create device file xgmi_hive_id\n"); + kobject_del(hive->kobj); + kobject_put(hive->kobj); + hive->kobj = NULL; + } + + return ret; +} + +static void amdgpu_xgmi_sysfs_destroy(struct amdgpu_device *adev, + struct amdgpu_hive_info *hive) +{ + sysfs_remove_file(hive->kobj, &hive->dev_attr.attr); + kobject_del(hive->kobj); + kobject_put(hive->kobj); + hive->kobj = NULL; +} + +static ssize_t amdgpu_xgmi_show_device_id(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + return snprintf(buf, PAGE_SIZE, "%llu\n", adev->gmc.xgmi.node_id); + +} + +#define AMDGPU_XGMI_SET_FICAA(o) ((o) | 0x456801) +static ssize_t amdgpu_xgmi_show_error(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + uint32_t ficaa_pie_ctl_in, ficaa_pie_status_in; + uint64_t fica_out; + unsigned int error_count = 0; + + ficaa_pie_ctl_in = AMDGPU_XGMI_SET_FICAA(0x200); + ficaa_pie_status_in = AMDGPU_XGMI_SET_FICAA(0x208); + + fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_ctl_in); + if (fica_out != 0x1f) + pr_err("xGMI error counters not enabled!\n"); + + fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_status_in); + + if ((fica_out & 0xffff) == 2) + error_count = ((fica_out >> 62) & 0x1) + (fica_out >> 63); + + adev->df_funcs->set_fica(adev, ficaa_pie_status_in, 0, 0); + + return snprintf(buf, PAGE_SIZE, "%d\n", error_count); +} + + +static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL); +static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL); + +static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, + struct amdgpu_hive_info *hive) +{ + int ret = 0; + char node[10] = { 0 }; + + /* Create xgmi device id file */ + ret = device_create_file(adev->dev, &dev_attr_xgmi_device_id); + if (ret) { + dev_err(adev->dev, "XGMI: Failed to create device file xgmi_device_id\n"); + return ret; + } + + /* Create xgmi error file */ + ret = device_create_file(adev->dev, &dev_attr_xgmi_error); + if (ret) + pr_err("failed to create xgmi_error\n"); + + + /* Create sysfs link to hive info folder on the first device */ + if (adev != hive->adev) { + ret = sysfs_create_link(&adev->dev->kobj, hive->kobj, + "xgmi_hive_info"); + if (ret) { + dev_err(adev->dev, "XGMI: Failed to create link to hive info"); + goto remove_file; + } + } + + sprintf(node, "node%d", hive->number_devices); + /* Create sysfs link form the hive folder to yourself */ + ret = sysfs_create_link(hive->kobj, &adev->dev->kobj, node); + if (ret) { + dev_err(adev->dev, "XGMI: Failed to create link from hive info"); + goto remove_link; + } + + goto success; + + +remove_link: + sysfs_remove_link(&adev->dev->kobj, adev->ddev->unique); + +remove_file: + device_remove_file(adev->dev, &dev_attr_xgmi_device_id); + +success: + return ret; +} + +static void amdgpu_xgmi_sysfs_rem_dev_info(struct amdgpu_device *adev, + struct amdgpu_hive_info *hive) +{ + device_remove_file(adev->dev, &dev_attr_xgmi_device_id); + sysfs_remove_link(&adev->dev->kobj, adev->ddev->unique); + sysfs_remove_link(hive->kobj, adev->ddev->unique); +} + + + struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock) { int i; @@ -66,18 +250,83 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo /* initialize new hive if not exist */ tmp = &xgmi_hives[hive_count++]; + + if (amdgpu_xgmi_sysfs_create(adev, tmp)) { + mutex_unlock(&xgmi_mutex); + return NULL; + } + + tmp->adev = adev; tmp->hive_id = adev->gmc.xgmi.hive_id; INIT_LIST_HEAD(&tmp->device_list); mutex_init(&tmp->hive_lock); mutex_init(&tmp->reset_lock); + if (lock) mutex_lock(&tmp->hive_lock); - + tmp->pstate = -1; mutex_unlock(&xgmi_mutex); return tmp; } +int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) +{ + int ret = 0; + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); + struct amdgpu_device *tmp_adev; + bool update_hive_pstate = true; + bool is_high_pstate = pstate && adev->asic_type == CHIP_VEGA20; + + if (!hive) + return 0; + + mutex_lock(&hive->hive_lock); + + if (hive->pstate == pstate) { + adev->pstate = is_high_pstate ? pstate : adev->pstate; + goto out; + } + + dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate); + + if (is_support_sw_smu_xgmi(adev)) + ret = smu_set_xgmi_pstate(&adev->smu, pstate); + else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_xgmi_pstate) + ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle, + pstate); + + if (ret) { + dev_err(adev->dev, + "XGMI: Set pstate failure on device %llx, hive %llx, ret %d", + adev->gmc.xgmi.node_id, + adev->gmc.xgmi.hive_id, ret); + goto out; + } + + /* Update device pstate */ + adev->pstate = pstate; + + /* + * Update the hive pstate only all devices of the hive + * are in the same pstate + */ + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + if (tmp_adev->pstate != adev->pstate) { + update_hive_pstate = false; + break; + } + } + if (update_hive_pstate || is_high_pstate) + hive->pstate = pstate; + +out: + mutex_unlock(&hive->hive_lock); + + return ret; +} + int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev) { int ret = -EINVAL; @@ -85,7 +334,7 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev /* Each psp need to set the latest topology */ ret = psp_xgmi_set_topology_info(&adev->psp, hive->number_devices, - &hive->topology_info); + &adev->psp.xgmi_context.top_info); if (ret) dev_err(adev->dev, "XGMI: Set topology failure on device %llx, hive %llx, ret %d", @@ -95,30 +344,48 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev return ret; } + +int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, + struct amdgpu_device *peer_adev) +{ + struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; + int i; + + for (i = 0 ; i < top->num_nodes; ++i) + if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id) + return top->nodes[i].num_hops; + return -EINVAL; +} + int amdgpu_xgmi_add_device(struct amdgpu_device *adev) { - struct psp_xgmi_topology_info *hive_topology; + struct psp_xgmi_topology_info *top_info; struct amdgpu_hive_info *hive; struct amdgpu_xgmi *entry; struct amdgpu_device *tmp_adev = NULL; - int count = 0, ret = -EINVAL; + int count = 0, ret = 0; if (!adev->gmc.xgmi.supported) return 0; - ret = psp_xgmi_get_node_id(&adev->psp, &adev->gmc.xgmi.node_id); - if (ret) { - dev_err(adev->dev, - "XGMI: Failed to get node id\n"); - return ret; - } + if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) { + ret = psp_xgmi_get_hive_id(&adev->psp, &adev->gmc.xgmi.hive_id); + if (ret) { + dev_err(adev->dev, + "XGMI: Failed to get hive id\n"); + return ret; + } - ret = psp_xgmi_get_hive_id(&adev->psp, &adev->gmc.xgmi.hive_id); - if (ret) { - dev_err(adev->dev, - "XGMI: Failed to get hive id\n"); - return ret; + ret = psp_xgmi_get_node_id(&adev->psp, &adev->gmc.xgmi.node_id); + if (ret) { + dev_err(adev->dev, + "XGMI: Failed to get node id\n"); + return ret; + } + } else { + adev->gmc.xgmi.hive_id = 16; + adev->gmc.xgmi.node_id = adev->gmc.xgmi.physical_node_id + 16; } hive = amdgpu_get_xgmi_hive(adev, 1); @@ -130,37 +397,60 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) goto exit; } - hive_topology = &hive->topology_info; + /* Set default device pstate */ + adev->pstate = -1; + + top_info = &adev->psp.xgmi_context.top_info; list_add_tail(&adev->gmc.xgmi.head, &hive->device_list); list_for_each_entry(entry, &hive->device_list, head) - hive_topology->nodes[count++].node_id = entry->node_id; + top_info->nodes[count++].node_id = entry->node_id; + top_info->num_nodes = count; hive->number_devices = count; - /* Each psp need to get the latest topology */ - list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology); - if (ret) { - dev_err(tmp_adev->dev, - "XGMI: Get topology failure on device %llx, hive %llx, ret %d", - tmp_adev->gmc.xgmi.node_id, - tmp_adev->gmc.xgmi.hive_id, ret); - /* To do : continue with some node failed or disable the whole hive */ - break; + if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) { + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + /* update node list for other device in the hive */ + if (tmp_adev != adev) { + top_info = &tmp_adev->psp.xgmi_context.top_info; + top_info->nodes[count - 1].node_id = + adev->gmc.xgmi.node_id; + top_info->num_nodes = count; + } + ret = amdgpu_xgmi_update_topology(hive, tmp_adev); + if (ret) + goto exit; } - } - list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - ret = amdgpu_xgmi_update_topology(hive, tmp_adev); - if (ret) - break; + /* get latest topology info for each device from psp */ + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, + &tmp_adev->psp.xgmi_context.top_info); + if (ret) { + dev_err(tmp_adev->dev, + "XGMI: Get topology failure on device %llx, hive %llx, ret %d", + tmp_adev->gmc.xgmi.node_id, + tmp_adev->gmc.xgmi.hive_id, ret); + /* To do : continue with some node failed or disable the whole hive */ + goto exit; + } + } } - dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n", - adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id); + if (!ret) + ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive); + mutex_unlock(&hive->hive_lock); exit: + if (!ret) + dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n", + adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id); + else + dev_err(adev->dev, "XGMI: Failed to add node %d, hive 0x%llx ret: %d\n", + adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id, + ret); + return ret; } @@ -176,9 +466,60 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev) return; if (!(hive->number_devices--)) { + amdgpu_xgmi_sysfs_destroy(adev, hive); mutex_destroy(&hive->hive_lock); mutex_destroy(&hive->reset_lock); } else { + amdgpu_xgmi_sysfs_rem_dev_info(adev, hive); mutex_unlock(&hive->hive_lock); } } + +int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev) +{ + int r; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + struct ras_fs_if fs_info = { + .sysfs_name = "xgmi_wafl_err_count", + .debugfs_name = "xgmi_wafl_err_inject", + }; + + if (!adev->gmc.xgmi.supported || + adev->gmc.xgmi.num_physical_nodes == 0) + return 0; + + if (!adev->gmc.xgmi.ras_if) { + adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!adev->gmc.xgmi.ras_if) + return -ENOMEM; + adev->gmc.xgmi.ras_if->block = AMDGPU_RAS_BLOCK__XGMI_WAFL; + adev->gmc.xgmi.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->gmc.xgmi.ras_if->sub_block_index = 0; + strcpy(adev->gmc.xgmi.ras_if->name, "xgmi_wafl"); + } + ih_info.head = fs_info.head = *adev->gmc.xgmi.ras_if; + r = amdgpu_ras_late_init(adev, adev->gmc.xgmi.ras_if, + &fs_info, &ih_info); + if (r || !amdgpu_ras_is_supported(adev, adev->gmc.xgmi.ras_if->block)) { + kfree(adev->gmc.xgmi.ras_if); + adev->gmc.xgmi.ras_if = NULL; + } + + return r; +} + +void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev) +{ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL) && + adev->gmc.xgmi.ras_if) { + struct ras_common_if *ras_if = adev->gmc.xgmi.ras_if; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + + amdgpu_ras_late_fini(adev, ras_if, &ih_info); + kfree(ras_if); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 14bc60664159..bbf504ff7051 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -27,15 +27,30 @@ struct amdgpu_hive_info { uint64_t hive_id; struct list_head device_list; - struct psp_xgmi_topology_info topology_info; int number_devices; - struct mutex hive_lock, - reset_lock; + struct mutex hive_lock, reset_lock; + struct kobject *kobj; + struct device_attribute dev_attr; + struct amdgpu_device *adev; + int pstate; /*0 -- low , 1 -- high , -1 unknown*/ }; struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock); int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev); int amdgpu_xgmi_add_device(struct amdgpu_device *adev); void amdgpu_xgmi_remove_device(struct amdgpu_device *adev); +int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); +int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, + struct amdgpu_device *peer_adev); +int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev); +void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev); + +static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, + struct amdgpu_device *bo_adev) +{ + return (adev != bo_adev && + adev->gmc.xgmi.hive_id && + adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id); +} #endif diff --git a/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c b/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c new file mode 100644 index 000000000000..fda99c958c3b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c @@ -0,0 +1,60 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "soc15.h" + +#include "soc15_common.h" +#include "arct_ip_offset.h" + +int arct_reg_base_init(struct amdgpu_device *adev) +{ + /* HW has more IP blocks, only initialized the block needed by our driver */ + uint32_t i; + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); + adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i])); + adev->reg_offset[SDMA2_HWIP][i] = (uint32_t *)(&(SDMA2_BASE.instance[i])); + adev->reg_offset[SDMA3_HWIP][i] = (uint32_t *)(&(SDMA3_BASE.instance[i])); + adev->reg_offset[SDMA4_HWIP][i] = (uint32_t *)(&(SDMA4_BASE.instance[i])); + adev->reg_offset[SDMA5_HWIP][i] = (uint32_t *)(&(SDMA5_BASE.instance[i])); + adev->reg_offset[SDMA6_HWIP][i] = (uint32_t *)(&(SDMA6_BASE.instance[i])); + adev->reg_offset[SDMA7_HWIP][i] = (uint32_t *)(&(SDMA7_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + adev->reg_offset[UMC_HWIP][i] = (uint32_t *)(&(UMC_BASE.instance[i])); + adev->reg_offset[RSMU_HWIP][i] = (uint32_t *)(&(RSMU_BASE.instance[i])); + } + return 0; +} + + diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c new file mode 100644 index 000000000000..d9cc746af5e6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c @@ -0,0 +1,103 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "athub_v1_0.h" + +#include "athub/athub_1_0_offset.h" +#include "athub/athub_1_0_sh_mask.h" +#include "vega10_enum.h" + +#include "soc15_common.h" + +static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) + data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; + else + data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; + + if (def != data) + WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); +} + +static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && + (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) + data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + else + data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + + if(def != data) + WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); +} + +int athub_v1_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state) +{ + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (adev->asic_type) { + case CHIP_VEGA10: + case CHIP_VEGA12: + case CHIP_VEGA20: + case CHIP_RAVEN: + athub_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + athub_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: + break; + } + + return 0; +} + +void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +{ + int data; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + /* AMD_CG_SUPPORT_ATHUB_MGCG */ + data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); + if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_ATHUB_MGCG; + + /* AMD_CG_SUPPORT_ATHUB_LS */ + if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_ATHUB_LS; +} diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h new file mode 100644 index 000000000000..b279af59e34f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __ATHUB_V1_0_H__ +#define __ATHUB_V1_0_H__ + +int athub_v1_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state); +void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c new file mode 100644 index 000000000000..ceb9aa4df0e7 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c @@ -0,0 +1,103 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "athub_v2_0.h" + +#include "athub/athub_2_0_0_offset.h" +#include "athub/athub_2_0_0_sh_mask.h" +#include "athub/athub_2_0_0_default.h" +#include "navi10_enum.h" + +#include "soc15_common.h" + +static void +athub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) + data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; + else + data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; + + if (def != data) + WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); +} + +static void +athub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && + (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) + data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + else + data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + + if (def != data) + WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); +} + +int athub_v2_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state) +{ + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (adev->asic_type) { + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + athub_v2_0_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + athub_v2_0_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: + break; + } + + return 0; +} + +void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +{ + int data; + + /* AMD_CG_SUPPORT_ATHUB_MGCG */ + data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); + if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_ATHUB_MGCG; + + /* AMD_CG_SUPPORT_ATHUB_LS */ + if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_ATHUB_LS; +} diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h new file mode 100644 index 000000000000..02932c1c8bab --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __ATHUB_V2_0_H__ +#define __ATHUB_V2_0_H__ + +int athub_v2_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state); +void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h b/drivers/gpu/drm/amd/amdgpu/atom.h index a39170991afe..4205bbe5d8d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.h +++ b/drivers/gpu/drm/amd/amdgpu/atom.h @@ -26,7 +26,8 @@ #define ATOM_H #include <linux/types.h> -#include <drm/drmP.h> + +struct drm_device; #define ATOM_BIOS_MAGIC 0xAA55 #define ATOM_ATI_MAGIC_PTR 0x30 diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c index 8a0818b23ea4..213e62a28ba0 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c @@ -23,7 +23,7 @@ * Authors: Dave Airlie * Alex Deucher */ -#include <drm/drmP.h> + #include <drm/drm_crtc_helper.h> #include <drm/amdgpu_drm.h> #include <drm/drm_fixed.h> diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c index f81068ba4cc6..6858cde9fc5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c @@ -24,7 +24,7 @@ * Alex Deucher * Jerome Glisse */ -#include <drm/drmP.h> + #include <drm/amdgpu_drm.h> #include "amdgpu.h" diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 60e2447e12c5..1e94a9b652f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -23,7 +23,9 @@ * Authors: Dave Airlie * Alex Deucher */ -#include <drm/drmP.h> + +#include <linux/pci.h> + #include <drm/drm_crtc_helper.h> #include <drm/amdgpu_drm.h> #include "amdgpu.h" diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c index f9b2ce9a98f3..980c363b1a0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c @@ -22,7 +22,7 @@ * Authors: Alex Deucher * */ -#include <drm/drmP.h> + #include <drm/amdgpu_drm.h> #include "amdgpu.h" #include "atom.h" diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 07c1f239e9c3..2d64d270725d 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -24,7 +24,8 @@ #include <linux/firmware.h> #include <linux/slab.h> #include <linux/module.h> -#include <drm/drmP.h> +#include <linux/pci.h> + #include "amdgpu.h" #include "amdgpu_atombios.h" #include "amdgpu_ih.h" @@ -965,6 +966,25 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev, static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = { {mmGRBM_STATUS}, + {mmGRBM_STATUS2}, + {mmGRBM_STATUS_SE0}, + {mmGRBM_STATUS_SE1}, + {mmGRBM_STATUS_SE2}, + {mmGRBM_STATUS_SE3}, + {mmSRBM_STATUS}, + {mmSRBM_STATUS2}, + {mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET}, + {mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET}, + {mmCP_STAT}, + {mmCP_STALLED_STAT1}, + {mmCP_STALLED_STAT2}, + {mmCP_STALLED_STAT3}, + {mmCP_CPF_BUSY_STAT}, + {mmCP_CPF_STALLED_STAT1}, + {mmCP_CPF_STATUS}, + {mmCP_CPC_BUSY_STAT}, + {mmCP_CPC_STALLED_STAT1}, + {mmCP_CPC_STATUS}, {mmGB_ADDR_CONFIG}, {mmMC_ARB_RAMCFG}, {mmGB_TILE_MODE0}, @@ -1269,15 +1289,15 @@ static int cik_gpu_pci_config_reset(struct amdgpu_device *adev) } /** - * cik_asic_reset - soft reset GPU + * cik_asic_pci_config_reset - soft reset GPU * * @adev: amdgpu_device pointer * - * Look up which blocks are hung and attempt - * to reset them. + * Use PCI Config method to reset the GPU. + * * Returns 0 for success. */ -static int cik_asic_reset(struct amdgpu_device *adev) +static int cik_asic_pci_config_reset(struct amdgpu_device *adev) { int r; @@ -1290,6 +1310,50 @@ static int cik_asic_reset(struct amdgpu_device *adev) return r; } +static enum amd_reset_method +cik_asic_reset_method(struct amdgpu_device *adev) +{ + bool baco_reset; + + switch (adev->asic_type) { + case CHIP_BONAIRE: + case CHIP_HAWAII: + /* disable baco reset until it works */ + /* smu7_asic_get_baco_capability(adev, &baco_reset); */ + baco_reset = false; + break; + default: + baco_reset = false; + break; + } + + if (baco_reset) + return AMD_RESET_METHOD_BACO; + else + return AMD_RESET_METHOD_LEGACY; +} + +/** + * cik_asic_reset - soft reset GPU + * + * @adev: amdgpu_device pointer + * + * Look up which blocks are hung and attempt + * to reset them. + * Returns 0 for success. + */ +static int cik_asic_reset(struct amdgpu_device *adev) +{ + int r; + + if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) + r = smu7_asic_baco_reset(adev); + else + r = cik_asic_pci_config_reset(adev); + + return r; +} + static u32 cik_get_config_memsize(struct amdgpu_device *adev) { return RREG32(mmCONFIG_MEMSIZE); @@ -1804,12 +1868,25 @@ static bool cik_need_reset_on_init(struct amdgpu_device *adev) return false; } +static uint64_t cik_get_pcie_replay_count(struct amdgpu_device *adev) +{ + uint64_t nak_r, nak_g; + + /* Get the number of NAKs received and generated */ + nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK); + nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED); + + /* Add the total number of NAKs, i.e the number of replays */ + return (nak_r + nak_g); +} + static const struct amdgpu_asic_funcs cik_asic_funcs = { .read_disabled_bios = &cik_read_disabled_bios, .read_bios_from_rom = &cik_read_bios_from_rom, .read_register = &cik_read_register, .reset = &cik_asic_reset, + .reset_method = &cik_asic_reset_method, .set_vga_state = &cik_vga_set_state, .get_xclk = &cik_get_xclk, .set_uvd_clocks = &cik_set_uvd_clocks, @@ -1821,6 +1898,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = .init_doorbell_index = &legacy_doorbell_index_init, .get_pcie_usage = &cik_get_pcie_usage, .need_reset_on_init = &cik_need_reset_on_init, + .get_pcie_replay_count = &cik_get_pcie_replay_count, }; static int cik_common_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/cik.h b/drivers/gpu/drm/amd/amdgpu/cik.h index 54c625a2e570..9870bf27870e 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.h +++ b/drivers/gpu/drm/amd/amdgpu/cik.h @@ -31,4 +31,7 @@ void cik_srbm_select(struct amdgpu_device *adev, int cik_set_ip_blocks(struct amdgpu_device *adev); void legacy_doorbell_index_init(struct amdgpu_device *adev); +int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap); +int smu7_asic_baco_reset(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index 721c757156e8..401c99f0b2d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -20,7 +20,9 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include <drm/drmP.h> + +#include <linux/pci.h> + #include "amdgpu.h" #include "amdgpu_ih.h" #include "cikd.h" diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 189599b694e8..c45304f1047c 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -21,8 +21,10 @@ * * Authors: Alex Deucher */ + #include <linux/firmware.h> -#include <drm/drmP.h> +#include <linux/module.h> + #include "amdgpu.h" #include "amdgpu_ucode.h" #include "amdgpu_trace.h" @@ -640,7 +642,7 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring) tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) @@ -977,8 +979,8 @@ static int cik_sdma_sw_init(void *handle) r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, (i == 0) ? - AMDGPU_SDMA_IRQ_TRAP0 : - AMDGPU_SDMA_IRQ_TRAP1); + AMDGPU_SDMA_IRQ_INSTANCE0 : + AMDGPU_SDMA_IRQ_INSTANCE1); if (r) return r; } @@ -1114,7 +1116,7 @@ static int cik_sdma_set_trap_irq_state(struct amdgpu_device *adev, u32 sdma_cntl; switch (type) { - case AMDGPU_SDMA_IRQ_TRAP0: + case AMDGPU_SDMA_IRQ_INSTANCE0: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET); @@ -1130,7 +1132,7 @@ static int cik_sdma_set_trap_irq_state(struct amdgpu_device *adev, break; } break; - case AMDGPU_SDMA_IRQ_TRAP1: + case AMDGPU_SDMA_IRQ_INSTANCE1: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET); diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx10.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx10.h new file mode 100644 index 000000000000..27a2ff0a07d2 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx10.h @@ -0,0 +1,975 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +static const unsigned int gfx10_SECT_CONTEXT_def_1[] = { + 0x00000000, // DB_RENDER_CONTROL + 0x00000000, // DB_COUNT_CONTROL + 0x00000000, // DB_DEPTH_VIEW + 0x00000000, // DB_RENDER_OVERRIDE + 0x00000000, // DB_RENDER_OVERRIDE2 + 0x00000000, // DB_HTILE_DATA_BASE + 0x00000000, // HOLE + 0x00000000, // DB_DEPTH_SIZE_XY + 0x00000000, // DB_DEPTH_BOUNDS_MIN + 0x00000000, // DB_DEPTH_BOUNDS_MAX + 0x00000000, // DB_STENCIL_CLEAR + 0x00000000, // DB_DEPTH_CLEAR + 0x00000000, // PA_SC_SCREEN_SCISSOR_TL + 0x40004000, // PA_SC_SCREEN_SCISSOR_BR + 0x00000000, // DB_DFSM_CONTROL + 0x00000000, // DB_DEPTH_INFO + 0x00000000, // DB_Z_INFO + 0x00000000, // DB_STENCIL_INFO + 0x00000000, // DB_Z_READ_BASE + 0x00000000, // DB_STENCIL_READ_BASE + 0x00000000, // DB_Z_WRITE_BASE + 0x00000000, // DB_STENCIL_WRITE_BASE + 0x00000000, // DB_DEPTH_SIZE + 0x00000000, // DB_DEPTH_SLICE + 0x00000000, // DB_Z_INFO2 + 0x00000000, // DB_STENCIL_INFO2 + 0x00000000, // DB_Z_READ_BASE_HI + 0x00000000, // DB_STENCIL_READ_BASE_HI + 0x00000000, // DB_Z_WRITE_BASE_HI + 0x00000000, // DB_STENCIL_WRITE_BASE_HI + 0x00000000, // DB_HTILE_DATA_BASE_HI + 0x00150055, // DB_RMI_L2_CACHE_CONTROL + 0x00000000, // TA_BC_BASE_ADDR + 0x00000000, // TA_BC_BASE_ADDR_HI + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // COHER_DEST_BASE_HI_0 + 0x00000000, // COHER_DEST_BASE_HI_1 + 0x00000000, // COHER_DEST_BASE_HI_2 + 0x00000000, // COHER_DEST_BASE_HI_3 + 0x00000000, // COHER_DEST_BASE_2 + 0x00000000, // COHER_DEST_BASE_3 + 0x00000000, // PA_SC_WINDOW_OFFSET + 0x80000000, // PA_SC_WINDOW_SCISSOR_TL + 0x40004000, // PA_SC_WINDOW_SCISSOR_BR + 0x0000ffff, // PA_SC_CLIPRECT_RULE + 0x00000000, // PA_SC_CLIPRECT_0_TL + 0x40004000, // PA_SC_CLIPRECT_0_BR + 0x00000000, // PA_SC_CLIPRECT_1_TL + 0x40004000, // PA_SC_CLIPRECT_1_BR + 0x00000000, // PA_SC_CLIPRECT_2_TL + 0x40004000, // PA_SC_CLIPRECT_2_BR + 0x00000000, // PA_SC_CLIPRECT_3_TL + 0x40004000, // PA_SC_CLIPRECT_3_BR + 0xaa99aaaa, // PA_SC_EDGERULE + 0x00000000, // PA_SU_HARDWARE_SCREEN_OFFSET + 0xffffffff, // CB_TARGET_MASK + 0xffffffff, // CB_SHADER_MASK + 0x80000000, // PA_SC_GENERIC_SCISSOR_TL + 0x40004000, // PA_SC_GENERIC_SCISSOR_BR + 0x00000000, // COHER_DEST_BASE_0 + 0x00000000, // COHER_DEST_BASE_1 + 0x80000000, // PA_SC_VPORT_SCISSOR_0_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_0_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_1_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_1_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_2_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_2_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_3_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_3_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_4_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_4_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_5_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_5_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_6_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_6_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_7_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_7_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_8_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_8_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_9_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_9_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_10_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_10_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_11_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_11_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_12_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_12_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_13_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_13_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_14_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_14_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_15_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_15_BR + 0x00000000, // PA_SC_VPORT_ZMIN_0 + 0x3f800000, // PA_SC_VPORT_ZMAX_0 + 0x00000000, // PA_SC_VPORT_ZMIN_1 + 0x3f800000, // PA_SC_VPORT_ZMAX_1 + 0x00000000, // PA_SC_VPORT_ZMIN_2 + 0x3f800000, // PA_SC_VPORT_ZMAX_2 + 0x00000000, // PA_SC_VPORT_ZMIN_3 + 0x3f800000, // PA_SC_VPORT_ZMAX_3 + 0x00000000, // PA_SC_VPORT_ZMIN_4 + 0x3f800000, // PA_SC_VPORT_ZMAX_4 + 0x00000000, // PA_SC_VPORT_ZMIN_5 + 0x3f800000, // PA_SC_VPORT_ZMAX_5 + 0x00000000, // PA_SC_VPORT_ZMIN_6 + 0x3f800000, // PA_SC_VPORT_ZMAX_6 + 0x00000000, // PA_SC_VPORT_ZMIN_7 + 0x3f800000, // PA_SC_VPORT_ZMAX_7 + 0x00000000, // PA_SC_VPORT_ZMIN_8 + 0x3f800000, // PA_SC_VPORT_ZMAX_8 + 0x00000000, // PA_SC_VPORT_ZMIN_9 + 0x3f800000, // PA_SC_VPORT_ZMAX_9 + 0x00000000, // PA_SC_VPORT_ZMIN_10 + 0x3f800000, // PA_SC_VPORT_ZMAX_10 + 0x00000000, // PA_SC_VPORT_ZMIN_11 + 0x3f800000, // PA_SC_VPORT_ZMAX_11 + 0x00000000, // PA_SC_VPORT_ZMIN_12 + 0x3f800000, // PA_SC_VPORT_ZMAX_12 + 0x00000000, // PA_SC_VPORT_ZMIN_13 + 0x3f800000, // PA_SC_VPORT_ZMAX_13 + 0x00000000, // PA_SC_VPORT_ZMIN_14 + 0x3f800000, // PA_SC_VPORT_ZMAX_14 + 0x00000000, // PA_SC_VPORT_ZMIN_15 + 0x3f800000, // PA_SC_VPORT_ZMAX_15 + 0x00000000, // PA_SC_RASTER_CONFIG + 0x00000000, // PA_SC_RASTER_CONFIG_1 + 0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL +}; +static const unsigned int gfx10_SECT_CONTEXT_def_2[] = { + 0x00000000, // CP_PERFMON_CNTX_CNTL + 0x00000000, // CP_RINGID + 0x00000000, // CP_VMID + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // PA_SC_RIGHT_VERT_GRID + 0x00000000, // PA_SC_LEFT_VERT_GRID + 0x00000000, // PA_SC_HORIZ_GRID + 0x00000000, // HOLE + 0x00000000, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0xffffffff, // VGT_MAX_VTX_INDX + 0x00000000, // VGT_MIN_VTX_INDX + 0x00000000, // VGT_INDX_OFFSET + 0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX + 0x00550055, // CB_RMI_GL2_CACHE_CONTROL + 0x00000000, // CB_BLEND_RED + 0x00000000, // CB_BLEND_GREEN + 0x00000000, // CB_BLEND_BLUE + 0x00000000, // CB_BLEND_ALPHA + 0x00000000, // CB_DCC_CONTROL + 0x00000000, // CB_COVERAGE_OUT_CONTROL + 0x00000000, // DB_STENCIL_CONTROL + 0x01000000, // DB_STENCILREFMASK + 0x01000000, // DB_STENCILREFMASK_BF + 0, // HOLE + 0x00000000, // PA_CL_VPORT_XSCALE + 0x00000000, // PA_CL_VPORT_XOFFSET + 0x00000000, // PA_CL_VPORT_YSCALE + 0x00000000, // PA_CL_VPORT_YOFFSET + 0x00000000, // PA_CL_VPORT_ZSCALE + 0x00000000, // PA_CL_VPORT_ZOFFSET + 0x00000000, // PA_CL_VPORT_XSCALE_1 + 0x00000000, // PA_CL_VPORT_XOFFSET_1 + 0x00000000, // PA_CL_VPORT_YSCALE_1 + 0x00000000, // PA_CL_VPORT_YOFFSET_1 + 0x00000000, // PA_CL_VPORT_ZSCALE_1 + 0x00000000, // PA_CL_VPORT_ZOFFSET_1 + 0x00000000, // PA_CL_VPORT_XSCALE_2 + 0x00000000, // PA_CL_VPORT_XOFFSET_2 + 0x00000000, // PA_CL_VPORT_YSCALE_2 + 0x00000000, // PA_CL_VPORT_YOFFSET_2 + 0x00000000, // PA_CL_VPORT_ZSCALE_2 + 0x00000000, // PA_CL_VPORT_ZOFFSET_2 + 0x00000000, // PA_CL_VPORT_XSCALE_3 + 0x00000000, // PA_CL_VPORT_XOFFSET_3 + 0x00000000, // PA_CL_VPORT_YSCALE_3 + 0x00000000, // PA_CL_VPORT_YOFFSET_3 + 0x00000000, // PA_CL_VPORT_ZSCALE_3 + 0x00000000, // PA_CL_VPORT_ZOFFSET_3 + 0x00000000, // PA_CL_VPORT_XSCALE_4 + 0x00000000, // PA_CL_VPORT_XOFFSET_4 + 0x00000000, // PA_CL_VPORT_YSCALE_4 + 0x00000000, // PA_CL_VPORT_YOFFSET_4 + 0x00000000, // PA_CL_VPORT_ZSCALE_4 + 0x00000000, // PA_CL_VPORT_ZOFFSET_4 + 0x00000000, // PA_CL_VPORT_XSCALE_5 + 0x00000000, // PA_CL_VPORT_XOFFSET_5 + 0x00000000, // PA_CL_VPORT_YSCALE_5 + 0x00000000, // PA_CL_VPORT_YOFFSET_5 + 0x00000000, // PA_CL_VPORT_ZSCALE_5 + 0x00000000, // PA_CL_VPORT_ZOFFSET_5 + 0x00000000, // PA_CL_VPORT_XSCALE_6 + 0x00000000, // PA_CL_VPORT_XOFFSET_6 + 0x00000000, // PA_CL_VPORT_YSCALE_6 + 0x00000000, // PA_CL_VPORT_YOFFSET_6 + 0x00000000, // PA_CL_VPORT_ZSCALE_6 + 0x00000000, // PA_CL_VPORT_ZOFFSET_6 + 0x00000000, // PA_CL_VPORT_XSCALE_7 + 0x00000000, // PA_CL_VPORT_XOFFSET_7 + 0x00000000, // PA_CL_VPORT_YSCALE_7 + 0x00000000, // PA_CL_VPORT_YOFFSET_7 + 0x00000000, // PA_CL_VPORT_ZSCALE_7 + 0x00000000, // PA_CL_VPORT_ZOFFSET_7 + 0x00000000, // PA_CL_VPORT_XSCALE_8 + 0x00000000, // PA_CL_VPORT_XOFFSET_8 + 0x00000000, // PA_CL_VPORT_YSCALE_8 + 0x00000000, // PA_CL_VPORT_YOFFSET_8 + 0x00000000, // PA_CL_VPORT_ZSCALE_8 + 0x00000000, // PA_CL_VPORT_ZOFFSET_8 + 0x00000000, // PA_CL_VPORT_XSCALE_9 + 0x00000000, // PA_CL_VPORT_XOFFSET_9 + 0x00000000, // PA_CL_VPORT_YSCALE_9 + 0x00000000, // PA_CL_VPORT_YOFFSET_9 + 0x00000000, // PA_CL_VPORT_ZSCALE_9 + 0x00000000, // PA_CL_VPORT_ZOFFSET_9 + 0x00000000, // PA_CL_VPORT_XSCALE_10 + 0x00000000, // PA_CL_VPORT_XOFFSET_10 + 0x00000000, // PA_CL_VPORT_YSCALE_10 + 0x00000000, // PA_CL_VPORT_YOFFSET_10 + 0x00000000, // PA_CL_VPORT_ZSCALE_10 + 0x00000000, // PA_CL_VPORT_ZOFFSET_10 + 0x00000000, // PA_CL_VPORT_XSCALE_11 + 0x00000000, // PA_CL_VPORT_XOFFSET_11 + 0x00000000, // PA_CL_VPORT_YSCALE_11 + 0x00000000, // PA_CL_VPORT_YOFFSET_11 + 0x00000000, // PA_CL_VPORT_ZSCALE_11 + 0x00000000, // PA_CL_VPORT_ZOFFSET_11 + 0x00000000, // PA_CL_VPORT_XSCALE_12 + 0x00000000, // PA_CL_VPORT_XOFFSET_12 + 0x00000000, // PA_CL_VPORT_YSCALE_12 + 0x00000000, // PA_CL_VPORT_YOFFSET_12 + 0x00000000, // PA_CL_VPORT_ZSCALE_12 + 0x00000000, // PA_CL_VPORT_ZOFFSET_12 + 0x00000000, // PA_CL_VPORT_XSCALE_13 + 0x00000000, // PA_CL_VPORT_XOFFSET_13 + 0x00000000, // PA_CL_VPORT_YSCALE_13 + 0x00000000, // PA_CL_VPORT_YOFFSET_13 + 0x00000000, // PA_CL_VPORT_ZSCALE_13 + 0x00000000, // PA_CL_VPORT_ZOFFSET_13 + 0x00000000, // PA_CL_VPORT_XSCALE_14 + 0x00000000, // PA_CL_VPORT_XOFFSET_14 + 0x00000000, // PA_CL_VPORT_YSCALE_14 + 0x00000000, // PA_CL_VPORT_YOFFSET_14 + 0x00000000, // PA_CL_VPORT_ZSCALE_14 + 0x00000000, // PA_CL_VPORT_ZOFFSET_14 + 0x00000000, // PA_CL_VPORT_XSCALE_15 + 0x00000000, // PA_CL_VPORT_XOFFSET_15 + 0x00000000, // PA_CL_VPORT_YSCALE_15 + 0x00000000, // PA_CL_VPORT_YOFFSET_15 + 0x00000000, // PA_CL_VPORT_ZSCALE_15 + 0x00000000, // PA_CL_VPORT_ZOFFSET_15 + 0x00000000, // PA_CL_UCP_0_X + 0x00000000, // PA_CL_UCP_0_Y + 0x00000000, // PA_CL_UCP_0_Z + 0x00000000, // PA_CL_UCP_0_W + 0x00000000, // PA_CL_UCP_1_X + 0x00000000, // PA_CL_UCP_1_Y + 0x00000000, // PA_CL_UCP_1_Z + 0x00000000, // PA_CL_UCP_1_W + 0x00000000, // PA_CL_UCP_2_X + 0x00000000, // PA_CL_UCP_2_Y + 0x00000000, // PA_CL_UCP_2_Z + 0x00000000, // PA_CL_UCP_2_W + 0x00000000, // PA_CL_UCP_3_X + 0x00000000, // PA_CL_UCP_3_Y + 0x00000000, // PA_CL_UCP_3_Z + 0x00000000, // PA_CL_UCP_3_W + 0x00000000, // PA_CL_UCP_4_X + 0x00000000, // PA_CL_UCP_4_Y + 0x00000000, // PA_CL_UCP_4_Z + 0x00000000, // PA_CL_UCP_4_W + 0x00000000, // PA_CL_UCP_5_X + 0x00000000, // PA_CL_UCP_5_Y + 0x00000000, // PA_CL_UCP_5_Z + 0x00000000, // PA_CL_UCP_5_W + 0x00000000, // PA_CL_PROG_NEAR_CLIP_Z + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // SPI_PS_INPUT_CNTL_0 + 0x00000000, // SPI_PS_INPUT_CNTL_1 + 0x00000000, // SPI_PS_INPUT_CNTL_2 + 0x00000000, // SPI_PS_INPUT_CNTL_3 + 0x00000000, // SPI_PS_INPUT_CNTL_4 + 0x00000000, // SPI_PS_INPUT_CNTL_5 + 0x00000000, // SPI_PS_INPUT_CNTL_6 + 0x00000000, // SPI_PS_INPUT_CNTL_7 + 0x00000000, // SPI_PS_INPUT_CNTL_8 + 0x00000000, // SPI_PS_INPUT_CNTL_9 + 0x00000000, // SPI_PS_INPUT_CNTL_10 + 0x00000000, // SPI_PS_INPUT_CNTL_11 + 0x00000000, // SPI_PS_INPUT_CNTL_12 + 0x00000000, // SPI_PS_INPUT_CNTL_13 + 0x00000000, // SPI_PS_INPUT_CNTL_14 + 0x00000000, // SPI_PS_INPUT_CNTL_15 + 0x00000000, // SPI_PS_INPUT_CNTL_16 + 0x00000000, // SPI_PS_INPUT_CNTL_17 + 0x00000000, // SPI_PS_INPUT_CNTL_18 + 0x00000000, // SPI_PS_INPUT_CNTL_19 + 0x00000000, // SPI_PS_INPUT_CNTL_20 + 0x00000000, // SPI_PS_INPUT_CNTL_21 + 0x00000000, // SPI_PS_INPUT_CNTL_22 + 0x00000000, // SPI_PS_INPUT_CNTL_23 + 0x00000000, // SPI_PS_INPUT_CNTL_24 + 0x00000000, // SPI_PS_INPUT_CNTL_25 + 0x00000000, // SPI_PS_INPUT_CNTL_26 + 0x00000000, // SPI_PS_INPUT_CNTL_27 + 0x00000000, // SPI_PS_INPUT_CNTL_28 + 0x00000000, // SPI_PS_INPUT_CNTL_29 + 0x00000000, // SPI_PS_INPUT_CNTL_30 + 0x00000000, // SPI_PS_INPUT_CNTL_31 + 0x00000000, // SPI_VS_OUT_CONFIG + 0, // HOLE + 0x00000000, // SPI_PS_INPUT_ENA + 0x00000000, // SPI_PS_INPUT_ADDR + 0x00000000, // SPI_INTERP_CONTROL_0 + 0x00000002, // SPI_PS_IN_CONTROL + 0, // HOLE + 0x00000000, // SPI_BARYC_CNTL + 0, // HOLE + 0x00000000, // SPI_TMPRING_SIZE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // SPI_SHADER_IDX_FORMAT + 0x00000000, // SPI_SHADER_POS_FORMAT + 0x00000000, // SPI_SHADER_Z_FORMAT + 0x00000000, // SPI_SHADER_COL_FORMAT + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // SX_PS_DOWNCONVERT + 0x00000000, // SX_BLEND_OPT_EPSILON + 0x00000000, // SX_BLEND_OPT_CONTROL + 0x00000000, // SX_MRT0_BLEND_OPT + 0x00000000, // SX_MRT1_BLEND_OPT + 0x00000000, // SX_MRT2_BLEND_OPT + 0x00000000, // SX_MRT3_BLEND_OPT + 0x00000000, // SX_MRT4_BLEND_OPT + 0x00000000, // SX_MRT5_BLEND_OPT + 0x00000000, // SX_MRT6_BLEND_OPT + 0x00000000, // SX_MRT7_BLEND_OPT + 0x00000000, // CB_BLEND0_CONTROL + 0x00000000, // CB_BLEND1_CONTROL + 0x00000000, // CB_BLEND2_CONTROL + 0x00000000, // CB_BLEND3_CONTROL + 0x00000000, // CB_BLEND4_CONTROL + 0x00000000, // CB_BLEND5_CONTROL + 0x00000000, // CB_BLEND6_CONTROL + 0x00000000, // CB_BLEND7_CONTROL +}; +static const unsigned int gfx10_SECT_CONTEXT_def_3[] = { + 0x00000000, // PA_CL_POINT_X_RAD + 0x00000000, // PA_CL_POINT_Y_RAD + 0x00000000, // PA_CL_POINT_SIZE + 0x00000000, // PA_CL_POINT_CULL_RAD +}; +static const unsigned int gfx10_SECT_CONTEXT_def_4[] = { + 0x00000000, // VGT_GS_MAX_PRIMS_PER_SUBGROUP + 0x00000000, // DB_DEPTH_CONTROL + 0x00000000, // DB_EQAA + 0x00000000, // CB_COLOR_CONTROL + 0x00000000, // DB_SHADER_CONTROL + 0x00090000, // PA_CL_CLIP_CNTL + 0x00000004, // PA_SU_SC_MODE_CNTL + 0x00000000, // PA_CL_VTE_CNTL + 0x00000000, // PA_CL_VS_OUT_CNTL + 0x00000000, // PA_CL_NANINF_CNTL + 0x00000000, // PA_SU_LINE_STIPPLE_CNTL + 0x00000000, // PA_SU_LINE_STIPPLE_SCALE + 0x00000000, // PA_SU_PRIM_FILTER_CNTL + 0x00000000, // PA_SU_SMALL_PRIM_FILTER_CNTL + 0x00000000, // PA_CL_OBJPRIM_ID_CNTL + 0x00000000, // PA_CL_NGG_CNTL + 0x00000000, // PA_SU_OVER_RASTERIZATION_CNTL + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // PA_SU_POINT_SIZE + 0x00000000, // PA_SU_POINT_MINMAX + 0x00000000, // PA_SU_LINE_CNTL + 0x00000000, // PA_SC_LINE_STIPPLE + 0x00000000, // VGT_OUTPUT_PATH_CNTL + 0x00000000, // VGT_HOS_CNTL + 0x00000000, // VGT_HOS_MAX_TESS_LEVEL + 0x00000000, // VGT_HOS_MIN_TESS_LEVEL + 0x00000000, // VGT_HOS_REUSE_DEPTH + 0x00000000, // VGT_GROUP_PRIM_TYPE + 0x00000000, // VGT_GROUP_FIRST_DECR + 0x00000000, // VGT_GROUP_DECR + 0x00000000, // VGT_GROUP_VECT_0_CNTL + 0x00000000, // VGT_GROUP_VECT_1_CNTL + 0x00000000, // VGT_GROUP_VECT_0_FMT_CNTL + 0x00000000, // VGT_GROUP_VECT_1_FMT_CNTL + 0x00000000, // VGT_GS_MODE + 0x00000000, // VGT_GS_ONCHIP_CNTL + 0x00000000, // PA_SC_MODE_CNTL_0 + 0x00000000, // PA_SC_MODE_CNTL_1 + 0x00000000, // VGT_ENHANCE + 0x00000100, // VGT_GS_PER_ES + 0x00000080, // VGT_ES_PER_GS + 0x00000002, // VGT_GS_PER_VS + 0x00000000, // VGT_GSVS_RING_OFFSET_1 + 0x00000000, // VGT_GSVS_RING_OFFSET_2 + 0x00000000, // VGT_GSVS_RING_OFFSET_3 + 0x00000000, // VGT_GS_OUT_PRIM_TYPE + 0x00000000, // IA_ENHANCE +}; +static const unsigned int gfx10_SECT_CONTEXT_def_5[] = { + 0x00000000, // WD_ENHANCE + 0x00000000, // VGT_PRIMITIVEID_EN +}; +static const unsigned int gfx10_SECT_CONTEXT_def_6[] = { + 0x00000000, // VGT_PRIMITIVEID_RESET +}; +static const unsigned int gfx10_SECT_CONTEXT_def_7[] = { + 0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN + 0x00000000, // VGT_DRAW_PAYLOAD_CNTL + 0x00000000, // HOLE + 0x00000000, // VGT_INSTANCE_STEP_RATE_0 + 0x00000000, // VGT_INSTANCE_STEP_RATE_1 + 0x000000ff, // IA_MULTI_VGT_PARAM + 0x00000000, // VGT_ESGS_RING_ITEMSIZE + 0x00000000, // VGT_GSVS_RING_ITEMSIZE + 0x00000000, // VGT_REUSE_OFF + 0x00000000, // VGT_VTX_CNT_EN + 0x00000000, // DB_HTILE_SURFACE + 0x00000000, // DB_SRESULTS_COMPARE_STATE0 + 0x00000000, // DB_SRESULTS_COMPARE_STATE1 + 0x00000000, // DB_PRELOAD_CONTROL + 0, // HOLE + 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_0 + 0x00000000, // VGT_STRMOUT_VTX_STRIDE_0 + 0, // HOLE + 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_0 + 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_1 + 0x00000000, // VGT_STRMOUT_VTX_STRIDE_1 + 0, // HOLE + 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_1 + 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_2 + 0x00000000, // VGT_STRMOUT_VTX_STRIDE_2 + 0, // HOLE + 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_2 + 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_3 + 0x00000000, // VGT_STRMOUT_VTX_STRIDE_3 + 0, // HOLE + 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_3 + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET + 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE + 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE + 0, // HOLE + 0x00000000, // VGT_GS_MAX_VERT_OUT + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // VGT_TESS_DISTRIBUTION + 0x00000000, // VGT_SHADER_STAGES_EN + 0x00000000, // VGT_LS_HS_CONFIG + 0x00000000, // VGT_GS_VERT_ITEMSIZE + 0x00000000, // VGT_GS_VERT_ITEMSIZE_1 + 0x00000000, // VGT_GS_VERT_ITEMSIZE_2 + 0x00000000, // VGT_GS_VERT_ITEMSIZE_3 + 0x00000000, // VGT_TF_PARAM + 0x00000000, // DB_ALPHA_TO_MASK + 0x00000000, // VGT_DISPATCH_DRAW_INDEX + 0x00000000, // PA_SU_POLY_OFFSET_DB_FMT_CNTL + 0x00000000, // PA_SU_POLY_OFFSET_CLAMP + 0x00000000, // PA_SU_POLY_OFFSET_FRONT_SCALE + 0x00000000, // PA_SU_POLY_OFFSET_FRONT_OFFSET + 0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE + 0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET + 0x00000000, // VGT_GS_INSTANCE_CNT + 0x00000000, // VGT_STRMOUT_CONFIG + 0x00000000, // VGT_STRMOUT_BUFFER_CONFIG +}; +static const unsigned int gfx10_SECT_CONTEXT_def_8[] = { + 0x00000000, // PA_SC_CENTROID_PRIORITY_0 + 0x00000000, // PA_SC_CENTROID_PRIORITY_1 + 0x00001000, // PA_SC_LINE_CNTL + 0x00000000, // PA_SC_AA_CONFIG + 0x00000005, // PA_SU_VTX_CNTL + 0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ + 0x3f800000, // PA_CL_GB_VERT_DISC_ADJ + 0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ + 0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3 + 0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0 + 0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1 + 0x00000000, // PA_SC_SHADER_CONTROL + 0x00000003, // PA_SC_BINNER_CNTL_0 + 0x00000000, // PA_SC_BINNER_CNTL_1 + 0x00100000, // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL + 0x00000000, // PA_SC_NGG_MODE_CNTL + 0, // HOLE + 0x0000001e, // VGT_VERTEX_REUSE_BLOCK_CNTL + 0x00000020, // VGT_OUT_DEALLOC_CNTL + 0x00000000, // CB_COLOR0_BASE + 0x00000000, // CB_COLOR0_PITCH + 0x00000000, // CB_COLOR0_SLICE + 0x00000000, // CB_COLOR0_VIEW + 0x00000000, // CB_COLOR0_INFO + 0x00000000, // CB_COLOR0_ATTRIB + 0x00000000, // CB_COLOR0_DCC_CONTROL + 0x00000000, // CB_COLOR0_CMASK + 0x00000000, // CB_COLOR0_CMASK_SLICE + 0x00000000, // CB_COLOR0_FMASK + 0x00000000, // CB_COLOR0_FMASK_SLICE + 0x00000000, // CB_COLOR0_CLEAR_WORD0 + 0x00000000, // CB_COLOR0_CLEAR_WORD1 + 0x00000000, // CB_COLOR0_DCC_BASE + 0, // HOLE + 0x00000000, // CB_COLOR1_BASE + 0x00000000, // CB_COLOR1_PITCH + 0x00000000, // CB_COLOR1_SLICE + 0x00000000, // CB_COLOR1_VIEW + 0x00000000, // CB_COLOR1_INFO + 0x00000000, // CB_COLOR1_ATTRIB + 0x00000000, // CB_COLOR1_DCC_CONTROL + 0x00000000, // CB_COLOR1_CMASK + 0x00000000, // CB_COLOR1_CMASK_SLICE + 0x00000000, // CB_COLOR1_FMASK + 0x00000000, // CB_COLOR1_FMASK_SLICE + 0x00000000, // CB_COLOR1_CLEAR_WORD0 + 0x00000000, // CB_COLOR1_CLEAR_WORD1 + 0x00000000, // CB_COLOR1_DCC_BASE + 0, // HOLE + 0x00000000, // CB_COLOR2_BASE + 0x00000000, // CB_COLOR2_PITCH + 0x00000000, // CB_COLOR2_SLICE + 0x00000000, // CB_COLOR2_VIEW + 0x00000000, // CB_COLOR2_INFO + 0x00000000, // CB_COLOR2_ATTRIB + 0x00000000, // CB_COLOR2_DCC_CONTROL + 0x00000000, // CB_COLOR2_CMASK + 0x00000000, // CB_COLOR2_CMASK_SLICE + 0x00000000, // CB_COLOR2_FMASK + 0x00000000, // CB_COLOR2_FMASK_SLICE + 0x00000000, // CB_COLOR2_CLEAR_WORD0 + 0x00000000, // CB_COLOR2_CLEAR_WORD1 + 0x00000000, // CB_COLOR2_DCC_BASE + 0, // HOLE + 0x00000000, // CB_COLOR3_BASE + 0x00000000, // CB_COLOR3_PITCH + 0x00000000, // CB_COLOR3_SLICE + 0x00000000, // CB_COLOR3_VIEW + 0x00000000, // CB_COLOR3_INFO + 0x00000000, // CB_COLOR3_ATTRIB + 0x00000000, // CB_COLOR3_DCC_CONTROL + 0x00000000, // CB_COLOR3_CMASK + 0x00000000, // CB_COLOR3_CMASK_SLICE + 0x00000000, // CB_COLOR3_FMASK + 0x00000000, // CB_COLOR3_FMASK_SLICE + 0x00000000, // CB_COLOR3_CLEAR_WORD0 + 0x00000000, // CB_COLOR3_CLEAR_WORD1 + 0x00000000, // CB_COLOR3_DCC_BASE + 0, // HOLE + 0x00000000, // CB_COLOR4_BASE + 0x00000000, // CB_COLOR4_PITCH + 0x00000000, // CB_COLOR4_SLICE + 0x00000000, // CB_COLOR4_VIEW + 0x00000000, // CB_COLOR4_INFO + 0x00000000, // CB_COLOR4_ATTRIB + 0x00000000, // CB_COLOR4_DCC_CONTROL + 0x00000000, // CB_COLOR4_CMASK + 0x00000000, // CB_COLOR4_CMASK_SLICE + 0x00000000, // CB_COLOR4_FMASK + 0x00000000, // CB_COLOR4_FMASK_SLICE + 0x00000000, // CB_COLOR4_CLEAR_WORD0 + 0x00000000, // CB_COLOR4_CLEAR_WORD1 + 0x00000000, // CB_COLOR4_DCC_BASE + 0, // HOLE + 0x00000000, // CB_COLOR5_BASE + 0x00000000, // CB_COLOR5_PITCH + 0x00000000, // CB_COLOR5_SLICE + 0x00000000, // CB_COLOR5_VIEW + 0x00000000, // CB_COLOR5_INFO + 0x00000000, // CB_COLOR5_ATTRIB + 0x00000000, // CB_COLOR5_DCC_CONTROL + 0x00000000, // CB_COLOR5_CMASK + 0x00000000, // CB_COLOR5_CMASK_SLICE + 0x00000000, // CB_COLOR5_FMASK + 0x00000000, // CB_COLOR5_FMASK_SLICE + 0x00000000, // CB_COLOR5_CLEAR_WORD0 + 0x00000000, // CB_COLOR5_CLEAR_WORD1 + 0x00000000, // CB_COLOR5_DCC_BASE + 0, // HOLE + 0x00000000, // CB_COLOR6_BASE + 0x00000000, // CB_COLOR6_PITCH + 0x00000000, // CB_COLOR6_SLICE + 0x00000000, // CB_COLOR6_VIEW + 0x00000000, // CB_COLOR6_INFO + 0x00000000, // CB_COLOR6_ATTRIB + 0x00000000, // CB_COLOR6_DCC_CONTROL + 0x00000000, // CB_COLOR6_CMASK + 0x00000000, // CB_COLOR6_CMASK_SLICE + 0x00000000, // CB_COLOR6_FMASK + 0x00000000, // CB_COLOR6_FMASK_SLICE + 0x00000000, // CB_COLOR6_CLEAR_WORD0 + 0x00000000, // CB_COLOR6_CLEAR_WORD1 + 0x00000000, // CB_COLOR6_DCC_BASE + 0, // HOLE + 0x00000000, // CB_COLOR7_BASE + 0x00000000, // CB_COLOR7_PITCH + 0x00000000, // CB_COLOR7_SLICE + 0x00000000, // CB_COLOR7_VIEW + 0x00000000, // CB_COLOR7_INFO + 0x00000000, // CB_COLOR7_ATTRIB + 0x00000000, // CB_COLOR7_DCC_CONTROL + 0x00000000, // CB_COLOR7_CMASK + 0x00000000, // CB_COLOR7_CMASK_SLICE + 0x00000000, // CB_COLOR7_FMASK + 0x00000000, // CB_COLOR7_FMASK_SLICE + 0x00000000, // CB_COLOR7_CLEAR_WORD0 + 0x00000000, // CB_COLOR7_CLEAR_WORD1 + 0x00000000, // CB_COLOR7_DCC_BASE + 0, // HOLE + 0x00000000, // CB_COLOR0_BASE_EXT + 0x00000000, // CB_COLOR1_BASE_EXT + 0x00000000, // CB_COLOR2_BASE_EXT + 0x00000000, // CB_COLOR3_BASE_EXT + 0x00000000, // CB_COLOR4_BASE_EXT + 0x00000000, // CB_COLOR5_BASE_EXT + 0x00000000, // CB_COLOR6_BASE_EXT + 0x00000000, // CB_COLOR7_BASE_EXT + 0x00000000, // CB_COLOR0_CMASK_BASE_EXT + 0x00000000, // CB_COLOR1_CMASK_BASE_EXT + 0x00000000, // CB_COLOR2_CMASK_BASE_EXT + 0x00000000, // CB_COLOR3_CMASK_BASE_EXT + 0x00000000, // CB_COLOR4_CMASK_BASE_EXT + 0x00000000, // CB_COLOR5_CMASK_BASE_EXT + 0x00000000, // CB_COLOR6_CMASK_BASE_EXT + 0x00000000, // CB_COLOR7_CMASK_BASE_EXT + 0x00000000, // CB_COLOR0_FMASK_BASE_EXT + 0x00000000, // CB_COLOR1_FMASK_BASE_EXT + 0x00000000, // CB_COLOR2_FMASK_BASE_EXT + 0x00000000, // CB_COLOR3_FMASK_BASE_EXT + 0x00000000, // CB_COLOR4_FMASK_BASE_EXT + 0x00000000, // CB_COLOR5_FMASK_BASE_EXT + 0x00000000, // CB_COLOR6_FMASK_BASE_EXT + 0x00000000, // CB_COLOR7_FMASK_BASE_EXT + 0x00000000, // CB_COLOR0_DCC_BASE_EXT + 0x00000000, // CB_COLOR1_DCC_BASE_EXT + 0x00000000, // CB_COLOR2_DCC_BASE_EXT + 0x00000000, // CB_COLOR3_DCC_BASE_EXT + 0x00000000, // CB_COLOR4_DCC_BASE_EXT + 0x00000000, // CB_COLOR5_DCC_BASE_EXT + 0x00000000, // CB_COLOR6_DCC_BASE_EXT + 0x00000000, // CB_COLOR7_DCC_BASE_EXT + 0x00000000, // CB_COLOR0_ATTRIB2 + 0x00000000, // CB_COLOR1_ATTRIB2 + 0x00000000, // CB_COLOR2_ATTRIB2 + 0x00000000, // CB_COLOR3_ATTRIB2 + 0x00000000, // CB_COLOR4_ATTRIB2 + 0x00000000, // CB_COLOR5_ATTRIB2 + 0x00000000, // CB_COLOR6_ATTRIB2 + 0x00000000, // CB_COLOR7_ATTRIB2 + 0x00000000, // CB_COLOR0_ATTRIB3 + 0x00000000, // CB_COLOR1_ATTRIB3 + 0x00000000, // CB_COLOR2_ATTRIB3 + 0x00000000, // CB_COLOR3_ATTRIB3 + 0x00000000, // CB_COLOR4_ATTRIB3 + 0x00000000, // CB_COLOR5_ATTRIB3 + 0x00000000, // CB_COLOR6_ATTRIB3 + 0x00000000, // CB_COLOR7_ATTRIB3 +}; +static const struct cs_extent_def gfx10_SECT_CONTEXT_defs[] = { + {gfx10_SECT_CONTEXT_def_1, 0x0000a000, 215 }, + {gfx10_SECT_CONTEXT_def_2, 0x0000a0d8, 272 }, + {gfx10_SECT_CONTEXT_def_3, 0x0000a1f5, 4 }, + {gfx10_SECT_CONTEXT_def_4, 0x0000a1ff, 158 }, + {gfx10_SECT_CONTEXT_def_5, 0x0000a2a0, 2 }, + {gfx10_SECT_CONTEXT_def_6, 0x0000a2a3, 1 }, + {gfx10_SECT_CONTEXT_def_7, 0x0000a2a5, 66 }, + {gfx10_SECT_CONTEXT_def_8, 0x0000a2f5, 203 }, + { 0, 0, 0 } +}; +static const struct cs_section_def gfx10_cs_data[] = { + { gfx10_SECT_CONTEXT_defs, SECT_CONTEXT }, + { 0, SECT_NONE } +}; diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index 61024b9c7a4b..1dca0cabc326 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -20,7 +20,9 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include <drm/drmP.h> + +#include <linux/pci.h> + #include "amdgpu.h" #include "amdgpu_ih.h" #include "vid.h" diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 1f0426d2fc2a..40d2ac723dd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -20,7 +20,10 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include <drm/drmP.h> + +#include <drm/drm_fourcc.h> +#include <drm/drm_vblank.h> + #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_i2c.h" @@ -233,6 +236,7 @@ static void dce_v10_0_page_flip(struct amdgpu_device *adev, int crtc_id, u64 crtc_base, bool async) { struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; + struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb; u32 tmp; /* flip at hsync for async, default is vsync */ @@ -240,6 +244,9 @@ static void dce_v10_0_page_flip(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL, GRPH_SURFACE_UPDATE_H_RETRACE_EN, async ? 1 : 0); WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp); + /* update pitch */ + WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, + fb->pitches[0] / fb->format->cpp[0]); /* update the primary scanout address */ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, upper_32_bits(crtc_base)); @@ -323,9 +330,11 @@ static void dce_v10_0_hpd_init(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; u32 tmp; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -361,6 +370,7 @@ static void dce_v10_0_hpd_init(struct amdgpu_device *adev) amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } + drm_connector_list_iter_end(&iter); } /** @@ -375,9 +385,11 @@ static void dce_v10_0_hpd_fini(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; u32 tmp; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -390,6 +402,7 @@ static void dce_v10_0_hpd_fini(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } + drm_connector_list_iter_end(&iter); } static u32 dce_v10_0_hpd_get_gpio_reg(struct amdgpu_device *adev) @@ -1212,10 +1225,12 @@ static void dce_v10_0_afmt_audio_select_pin(struct drm_encoder *encoder) static void dce_v10_0_audio_write_latency_fields(struct drm_encoder *encoder, struct drm_display_mode *mode) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u32 tmp; int interlace = 0; @@ -1223,12 +1238,14 @@ static void dce_v10_0_audio_write_latency_fields(struct drm_encoder *encoder, if (!dig || !dig->afmt || !dig->afmt->pin) return; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1254,10 +1271,12 @@ static void dce_v10_0_audio_write_latency_fields(struct drm_encoder *encoder, static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u32 tmp; u8 *sadb = NULL; @@ -1266,12 +1285,14 @@ static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder if (!dig || !dig->afmt || !dig->afmt->pin) return; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1306,10 +1327,12 @@ static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; struct cea_sad *sads; int i, sad_count; @@ -1332,12 +1355,14 @@ static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder) if (!dig || !dig->afmt || !dig->afmt->pin) return; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1345,10 +1370,10 @@ static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder) } sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads); - if (sad_count <= 0) { + if (sad_count < 0) DRM_ERROR("Couldn't read SADs: %d\n", sad_count); + if (sad_count <= 0) return; - } BUG_ON(!sads); for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 2280b971d758..898ef72d423c 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -20,7 +20,10 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include <drm/drmP.h> + +#include <drm/drm_fourcc.h> +#include <drm/drm_vblank.h> + #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_i2c.h" @@ -251,6 +254,7 @@ static void dce_v11_0_page_flip(struct amdgpu_device *adev, int crtc_id, u64 crtc_base, bool async) { struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; + struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb; u32 tmp; /* flip immediate for async, default is vsync */ @@ -258,6 +262,9 @@ static void dce_v11_0_page_flip(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL, GRPH_SURFACE_UPDATE_IMMEDIATE_EN, async ? 1 : 0); WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp); + /* update pitch */ + WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, + fb->pitches[0] / fb->format->cpp[0]); /* update the scanout addresses */ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, upper_32_bits(crtc_base)); @@ -341,9 +348,11 @@ static void dce_v11_0_hpd_init(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; u32 tmp; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -378,6 +387,7 @@ static void dce_v11_0_hpd_init(struct amdgpu_device *adev) dce_v11_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } + drm_connector_list_iter_end(&iter); } /** @@ -392,9 +402,11 @@ static void dce_v11_0_hpd_fini(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; u32 tmp; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -406,6 +418,7 @@ static void dce_v11_0_hpd_fini(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } + drm_connector_list_iter_end(&iter); } static u32 dce_v11_0_hpd_get_gpio_reg(struct amdgpu_device *adev) @@ -1238,10 +1251,12 @@ static void dce_v11_0_afmt_audio_select_pin(struct drm_encoder *encoder) static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder, struct drm_display_mode *mode) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u32 tmp; int interlace = 0; @@ -1249,12 +1264,14 @@ static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder, if (!dig || !dig->afmt || !dig->afmt->pin) return; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1280,10 +1297,12 @@ static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder, static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u32 tmp; u8 *sadb = NULL; @@ -1292,12 +1311,14 @@ static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder if (!dig || !dig->afmt || !dig->afmt->pin) return; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1332,10 +1353,12 @@ static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; struct cea_sad *sads; int i, sad_count; @@ -1358,12 +1381,14 @@ static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder) if (!dig || !dig->afmt || !dig->afmt->pin) return; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1371,10 +1396,10 @@ static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder) } sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads); - if (sad_count <= 0) { + if (sad_count < 0) DRM_ERROR("Couldn't read SADs: %d\n", sad_count); + if (sad_count <= 0) return; - } BUG_ON(!sads); for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index bea32f076b91..db15a112becc 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -20,7 +20,12 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include <drm/drmP.h> + +#include <linux/pci.h> + +#include <drm/drm_fourcc.h> +#include <drm/drm_vblank.h> + #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_i2c.h" @@ -186,10 +191,14 @@ static void dce_v6_0_page_flip(struct amdgpu_device *adev, int crtc_id, u64 crtc_base, bool async) { struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; + struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb; /* flip at hsync for async, default is vsync */ WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, async ? GRPH_FLIP_CONTROL__GRPH_SURFACE_UPDATE_H_RETRACE_EN_MASK : 0); + /* update pitch */ + WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, + fb->pitches[0] / fb->format->cpp[0]); /* update the scanout addresses */ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, upper_32_bits(crtc_base)); @@ -272,9 +281,11 @@ static void dce_v6_0_hpd_init(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; u32 tmp; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -300,7 +311,7 @@ static void dce_v6_0_hpd_init(struct amdgpu_device *adev) dce_v6_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } - + drm_connector_list_iter_end(&iter); } /** @@ -315,9 +326,11 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; u32 tmp; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -329,6 +342,7 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } + drm_connector_list_iter_end(&iter); } static u32 dce_v6_0_hpd_get_gpio_reg(struct amdgpu_device *adev) @@ -1115,20 +1129,24 @@ static void dce_v6_0_audio_select_pin(struct drm_encoder *encoder) static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder, struct drm_display_mode *mode) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; int interlace = 0; u32 tmp; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1155,21 +1173,25 @@ static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder, static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u8 *sadb = NULL; int sad_count; u32 tmp; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1212,10 +1234,12 @@ static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder) static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; struct cea_sad *sads; int i, sad_count; @@ -1235,12 +1259,14 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO }, }; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1248,10 +1274,10 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) } sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads); - if (sad_count <= 0) { + if (sad_count < 0) DRM_ERROR("Couldn't read SADs: %d\n", sad_count); + if (sad_count <= 0) return; - } for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { u32 tmp = 0; @@ -1623,6 +1649,7 @@ static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder, struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); int bpc = 8; @@ -1630,12 +1657,14 @@ static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder, if (!dig || !dig->afmt) return; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 13da915991dd..f06c9022c1fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -20,7 +20,10 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include <drm/drmP.h> + +#include <drm/drm_fourcc.h> +#include <drm/drm_vblank.h> + #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_i2c.h" @@ -181,10 +184,14 @@ static void dce_v8_0_page_flip(struct amdgpu_device *adev, int crtc_id, u64 crtc_base, bool async) { struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; + struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb; /* flip at hsync for async, default is vsync */ WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, async ? GRPH_FLIP_CONTROL__GRPH_SURFACE_UPDATE_H_RETRACE_EN_MASK : 0); + /* update pitch */ + WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, + fb->pitches[0] / fb->format->cpp[0]); /* update the primary scanout addresses */ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, upper_32_bits(crtc_base)); @@ -268,9 +275,11 @@ static void dce_v8_0_hpd_init(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; u32 tmp; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -296,6 +305,7 @@ static void dce_v8_0_hpd_init(struct amdgpu_device *adev) dce_v8_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } + drm_connector_list_iter_end(&iter); } /** @@ -310,9 +320,11 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; u32 tmp; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -324,6 +336,7 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } + drm_connector_list_iter_end(&iter); } static u32 dce_v8_0_hpd_get_gpio_reg(struct amdgpu_device *adev) @@ -1150,10 +1163,12 @@ static void dce_v8_0_afmt_audio_select_pin(struct drm_encoder *encoder) static void dce_v8_0_audio_write_latency_fields(struct drm_encoder *encoder, struct drm_display_mode *mode) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u32 tmp = 0, offset; @@ -1162,12 +1177,14 @@ static void dce_v8_0_audio_write_latency_fields(struct drm_encoder *encoder, offset = dig->afmt->pin->offset; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1207,10 +1224,12 @@ static void dce_v8_0_audio_write_latency_fields(struct drm_encoder *encoder, static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u32 offset, tmp; u8 *sadb = NULL; @@ -1221,12 +1240,14 @@ static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder) offset = dig->afmt->pin->offset; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1256,11 +1277,13 @@ static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder) static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; u32 offset; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; struct cea_sad *sads; int i, sad_count; @@ -1285,12 +1308,14 @@ static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder) offset = dig->afmt->pin->offset; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1298,10 +1323,10 @@ static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder) } sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads); - if (sad_count <= 0) { + if (sad_count < 0) DRM_ERROR("Couldn't read SADs: %d\n", sad_count); + if (sad_count <= 0) return; - } BUG_ON(!sads); for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index e4cc1d48eaab..e4f94863332c 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -20,7 +20,9 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include <drm/drmP.h> + +#include <drm/drm_vblank.h> + #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_i2c.h" @@ -258,15 +260,14 @@ static struct drm_encoder * dce_virtual_encoder(struct drm_connector *connector) { struct drm_encoder *encoder; - int i; - drm_connector_for_each_possible_encoder(connector, encoder, i) { + drm_connector_for_each_possible_encoder(connector, encoder) { if (encoder->encoder_type == DRM_MODE_ENCODER_VIRTUAL) return encoder; } /* pick the first one */ - drm_connector_for_each_possible_encoder(connector, encoder, i) + drm_connector_for_each_possible_encoder(connector, encoder) return encoder; return NULL; @@ -452,12 +453,8 @@ static int dce_virtual_hw_init(void *handle) #endif /* no DCE */ break; - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - break; default: - DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type); + break; } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c index 9935371db7ce..d6221298b477 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c @@ -29,7 +29,11 @@ static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2}; -static void df_v1_7_init (struct amdgpu_device *adev) +static void df_v1_7_sw_init(struct amdgpu_device *adev) +{ +} + +static void df_v1_7_sw_fini(struct amdgpu_device *adev) { } @@ -110,7 +114,8 @@ static void df_v1_7_enable_ecc_force_par_wr_rmw(struct amdgpu_device *adev, } const struct amdgpu_df_funcs df_v1_7_funcs = { - .init = df_v1_7_init, + .sw_init = df_v1_7_sw_init, + .sw_fini = df_v1_7_sw_fini, .enable_broadcast_mode = df_v1_7_enable_broadcast_mode, .get_fb_channel_number = df_v1_7_get_fb_channel_number, .get_hbm_channel_number = df_v1_7_get_hbm_channel_number, diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index d5ebe566809b..16fbd2bc8ad1 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -30,8 +30,201 @@ static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 32, 0, 0, 0, 2, 4, 8}; -static void df_v3_6_init(struct amdgpu_device *adev) +/* init df format attrs */ +AMDGPU_PMU_ATTR(event, "config:0-7"); +AMDGPU_PMU_ATTR(instance, "config:8-15"); +AMDGPU_PMU_ATTR(umask, "config:16-23"); + +/* df format attributes */ +static struct attribute *df_v3_6_format_attrs[] = { + &pmu_attr_event.attr, + &pmu_attr_instance.attr, + &pmu_attr_umask.attr, + NULL +}; + +/* df format attribute group */ +static struct attribute_group df_v3_6_format_attr_group = { + .name = "format", + .attrs = df_v3_6_format_attrs, +}; + +/* df event attrs */ +AMDGPU_PMU_ATTR(cake0_pcsout_txdata, + "event=0x7,instance=0x46,umask=0x2"); +AMDGPU_PMU_ATTR(cake1_pcsout_txdata, + "event=0x7,instance=0x47,umask=0x2"); +AMDGPU_PMU_ATTR(cake0_pcsout_txmeta, + "event=0x7,instance=0x46,umask=0x4"); +AMDGPU_PMU_ATTR(cake1_pcsout_txmeta, + "event=0x7,instance=0x47,umask=0x4"); +AMDGPU_PMU_ATTR(cake0_ftiinstat_reqalloc, + "event=0xb,instance=0x46,umask=0x4"); +AMDGPU_PMU_ATTR(cake1_ftiinstat_reqalloc, + "event=0xb,instance=0x47,umask=0x4"); +AMDGPU_PMU_ATTR(cake0_ftiinstat_rspalloc, + "event=0xb,instance=0x46,umask=0x8"); +AMDGPU_PMU_ATTR(cake1_ftiinstat_rspalloc, + "event=0xb,instance=0x47,umask=0x8"); + +/* df event attributes */ +static struct attribute *df_v3_6_event_attrs[] = { + &pmu_attr_cake0_pcsout_txdata.attr, + &pmu_attr_cake1_pcsout_txdata.attr, + &pmu_attr_cake0_pcsout_txmeta.attr, + &pmu_attr_cake1_pcsout_txmeta.attr, + &pmu_attr_cake0_ftiinstat_reqalloc.attr, + &pmu_attr_cake1_ftiinstat_reqalloc.attr, + &pmu_attr_cake0_ftiinstat_rspalloc.attr, + &pmu_attr_cake1_ftiinstat_rspalloc.attr, + NULL +}; + +/* df event attribute group */ +static struct attribute_group df_v3_6_event_attr_group = { + .name = "events", + .attrs = df_v3_6_event_attrs +}; + +/* df event attr groups */ +const struct attribute_group *df_v3_6_attr_groups[] = { + &df_v3_6_format_attr_group, + &df_v3_6_event_attr_group, + NULL +}; + +static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev, + uint32_t ficaa_val) +{ + unsigned long flags, address, data; + uint32_t ficadl_val, ficadh_val; + + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3); + WREG32(data, ficaa_val); + + WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3); + ficadl_val = RREG32(data); + + WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3); + ficadh_val = RREG32(data); + + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + + return (((ficadh_val & 0xFFFFFFFFFFFFFFFF) << 32) | ficadl_val); +} + +static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val, + uint32_t ficadl_val, uint32_t ficadh_val) +{ + unsigned long flags, address, data; + + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3); + WREG32(data, ficaa_val); + + WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3); + WREG32(data, ficadl_val); + + WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3); + WREG32(data, ficadh_val); + + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + +/* + * df_v3_6_perfmon_rreg - read perfmon lo and hi + * + * required to be atomic. no mmio method provided so subsequent reads for lo + * and hi require to preserve df finite state machine + */ +static void df_v3_6_perfmon_rreg(struct amdgpu_device *adev, + uint32_t lo_addr, uint32_t *lo_val, + uint32_t hi_addr, uint32_t *hi_val) +{ + unsigned long flags, address, data; + + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, lo_addr); + *lo_val = RREG32(data); + WREG32(address, hi_addr); + *hi_val = RREG32(data); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + +/* + * df_v3_6_perfmon_wreg - write to perfmon lo and hi + * + * required to be atomic. no mmio method provided so subsequent reads after + * data writes cannot occur to preserve data fabrics finite state machine. + */ +static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr, + uint32_t lo_val, uint32_t hi_addr, uint32_t hi_val) +{ + unsigned long flags, address, data; + + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, lo_addr); + WREG32(data, lo_val); + WREG32(address, hi_addr); + WREG32(data, hi_val); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + +/* get the number of df counters available */ +static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev; + struct drm_device *ddev; + int i, count; + + ddev = dev_get_drvdata(dev); + adev = ddev->dev_private; + count = 0; + + for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) { + if (adev->df_perfmon_config_assign_mask[i] == 0) + count++; + } + + return snprintf(buf, PAGE_SIZE, "%i\n", count); +} + +/* device attr for available perfmon counters */ +static DEVICE_ATTR(df_cntr_avail, S_IRUGO, df_v3_6_get_df_cntr_avail, NULL); + +/* init perfmons */ +static void df_v3_6_sw_init(struct amdgpu_device *adev) +{ + int i, ret; + + ret = device_create_file(adev->dev, &dev_attr_df_cntr_avail); + if (ret) + DRM_ERROR("failed to create file for available df counters\n"); + + for (i = 0; i < AMDGPU_MAX_DF_PERFMONS; i++) + adev->df_perfmon_config_assign_mask[i] = 0; +} + +static void df_v3_6_sw_fini(struct amdgpu_device *adev) { + + device_remove_file(adev->dev, &dev_attr_df_cntr_avail); + } static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev, @@ -105,12 +298,262 @@ static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev, *flags |= AMD_CG_SUPPORT_DF_MGCG; } +/* get assigned df perfmon ctr as int */ +static int df_v3_6_pmc_config_2_cntr(struct amdgpu_device *adev, + uint64_t config) +{ + int i; + + for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) { + if ((config & 0x0FFFFFFUL) == + adev->df_perfmon_config_assign_mask[i]) + return i; + } + + return -EINVAL; +} + +/* get address based on counter assignment */ +static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev, + uint64_t config, + int is_ctrl, + uint32_t *lo_base_addr, + uint32_t *hi_base_addr) +{ + int target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); + + if (target_cntr < 0) + return; + + switch (target_cntr) { + + case 0: + *lo_base_addr = is_ctrl ? smnPerfMonCtlLo0 : smnPerfMonCtrLo0; + *hi_base_addr = is_ctrl ? smnPerfMonCtlHi0 : smnPerfMonCtrHi0; + break; + case 1: + *lo_base_addr = is_ctrl ? smnPerfMonCtlLo1 : smnPerfMonCtrLo1; + *hi_base_addr = is_ctrl ? smnPerfMonCtlHi1 : smnPerfMonCtrHi1; + break; + case 2: + *lo_base_addr = is_ctrl ? smnPerfMonCtlLo2 : smnPerfMonCtrLo2; + *hi_base_addr = is_ctrl ? smnPerfMonCtlHi2 : smnPerfMonCtrHi2; + break; + case 3: + *lo_base_addr = is_ctrl ? smnPerfMonCtlLo3 : smnPerfMonCtrLo3; + *hi_base_addr = is_ctrl ? smnPerfMonCtlHi3 : smnPerfMonCtrHi3; + break; + + } + +} + +/* get read counter address */ +static void df_v3_6_pmc_get_read_settings(struct amdgpu_device *adev, + uint64_t config, + uint32_t *lo_base_addr, + uint32_t *hi_base_addr) +{ + df_v3_6_pmc_get_addr(adev, config, 0, lo_base_addr, hi_base_addr); +} + +/* get control counter settings i.e. address and values to set */ +static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev, + uint64_t config, + uint32_t *lo_base_addr, + uint32_t *hi_base_addr, + uint32_t *lo_val, + uint32_t *hi_val) +{ + + uint32_t eventsel, instance, unitmask; + uint32_t instance_10, instance_5432, instance_76; + + df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr); + + if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) { + DRM_ERROR("[DF PMC] addressing not retrieved! Lo: %x, Hi: %x", + *lo_base_addr, *hi_base_addr); + return -ENXIO; + } + + eventsel = DF_V3_6_GET_EVENT(config) & 0x3f; + unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf; + instance = DF_V3_6_GET_INSTANCE(config); + + instance_10 = instance & 0x3; + instance_5432 = (instance >> 2) & 0xf; + instance_76 = (instance >> 6) & 0x3; + + *lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel | (1 << 22); + *hi_val = (instance_76 << 29) | instance_5432; + + DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x", + config, *lo_base_addr, *hi_base_addr, *lo_val, *hi_val); + + return 0; +} + +/* add df performance counters for read */ +static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev, + uint64_t config) +{ + int i, target_cntr; + + target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); + + if (target_cntr >= 0) + return 0; + + for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) { + if (adev->df_perfmon_config_assign_mask[i] == 0U) { + adev->df_perfmon_config_assign_mask[i] = + config & 0x0FFFFFFUL; + return 0; + } + } + + return -ENOSPC; +} + +/* release performance counter */ +static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev, + uint64_t config) +{ + int target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); + + if (target_cntr >= 0) + adev->df_perfmon_config_assign_mask[target_cntr] = 0ULL; +} + + +static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev, + uint64_t config) +{ + uint32_t lo_base_addr, hi_base_addr; + + df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr, + &hi_base_addr); + + if ((lo_base_addr == 0) || (hi_base_addr == 0)) + return; + + df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0); +} + +static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config, + int is_enable) +{ + uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; + int ret = 0; + + switch (adev->asic_type) { + case CHIP_VEGA20: + + df_v3_6_reset_perfmon_cntr(adev, config); + + if (is_enable) { + ret = df_v3_6_pmc_add_cntr(adev, config); + } else { + ret = df_v3_6_pmc_get_ctrl_settings(adev, + config, + &lo_base_addr, + &hi_base_addr, + &lo_val, + &hi_val); + + if (ret) + return ret; + + df_v3_6_perfmon_wreg(adev, lo_base_addr, lo_val, + hi_base_addr, hi_val); + } + + break; + default: + break; + } + + return ret; +} + +static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config, + int is_disable) +{ + uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; + int ret = 0; + + switch (adev->asic_type) { + case CHIP_VEGA20: + ret = df_v3_6_pmc_get_ctrl_settings(adev, + config, + &lo_base_addr, + &hi_base_addr, + &lo_val, + &hi_val); + + if (ret) + return ret; + + df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0); + + if (is_disable) + df_v3_6_pmc_release_cntr(adev, config); + + break; + default: + break; + } + + return ret; +} + +static void df_v3_6_pmc_get_count(struct amdgpu_device *adev, + uint64_t config, + uint64_t *count) +{ + uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; + *count = 0; + + switch (adev->asic_type) { + case CHIP_VEGA20: + + df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr, + &hi_base_addr); + + if ((lo_base_addr == 0) || (hi_base_addr == 0)) + return; + + df_v3_6_perfmon_rreg(adev, lo_base_addr, &lo_val, + hi_base_addr, &hi_val); + + *count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL); + + if (*count >= DF_V3_6_PERFMON_OVERFLOW) + *count = 0; + + DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x", + config, lo_base_addr, hi_base_addr, lo_val, hi_val); + + break; + + default: + break; + } +} + const struct amdgpu_df_funcs df_v3_6_funcs = { - .init = df_v3_6_init, + .sw_init = df_v3_6_sw_init, + .sw_fini = df_v3_6_sw_fini, .enable_broadcast_mode = df_v3_6_enable_broadcast_mode, .get_fb_channel_number = df_v3_6_get_fb_channel_number, .get_hbm_channel_number = df_v3_6_get_hbm_channel_number, .update_medium_grain_clock_gating = df_v3_6_update_medium_grain_clock_gating, .get_clockgating_state = df_v3_6_get_clockgating_state, + .pmc_start = df_v3_6_pmc_start, + .pmc_stop = df_v3_6_pmc_stop, + .pmc_get_count = df_v3_6_pmc_get_count, + .get_fica = df_v3_6_get_fica, + .set_fica = df_v3_6_set_fica }; diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h index e79c58e5efcb..76998541bc30 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h @@ -35,6 +35,16 @@ enum DF_V3_6_MGCG { DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15 }; +/* Defined in global_features.h as FTI_PERFMON_VISIBLE */ +#define DF_V3_6_MAX_COUNTERS 4 + +/* get flags from df perfmon config */ +#define DF_V3_6_GET_EVENT(x) (x & 0xFFUL) +#define DF_V3_6_GET_INSTANCE(x) ((x >> 8) & 0xFFUL) +#define DF_V3_6_GET_UNITMASK(x) ((x >> 16) & 0xFFUL) +#define DF_V3_6_PERFMON_OVERFLOW 0xFFFFFFFFFFFFULL + +extern const struct attribute_group *df_v3_6_attr_groups[]; extern const struct amdgpu_df_funcs df_v3_6_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c new file mode 100644 index 000000000000..ca5f0e7ea1ac --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -0,0 +1,5490 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/delay.h> +#include <linux/kernel.h> +#include <linux/firmware.h> +#include <linux/module.h> +#include <linux/pci.h> +#include "amdgpu.h" +#include "amdgpu_gfx.h" +#include "amdgpu_psp.h" +#include "amdgpu_smu.h" +#include "nv.h" +#include "nvd.h" + +#include "gc/gc_10_1_0_offset.h" +#include "gc/gc_10_1_0_sh_mask.h" +#include "navi10_enum.h" +#include "hdp/hdp_5_0_0_offset.h" +#include "ivsrcid/gfx/irqsrcs_gfx_10_1.h" + +#include "soc15.h" +#include "soc15_common.h" +#include "clearstate_gfx10.h" +#include "v10_structs.h" +#include "gfx_v10_0.h" +#include "nbio_v2_3.h" + +/** + * Navi10 has two graphic rings to share each graphic pipe. + * 1. Primary ring + * 2. Async ring + * + * In bring-up phase, it just used primary ring so set gfx ring number as 1 at + * first. + */ +#define GFX10_NUM_GFX_RINGS 2 +#define GFX10_MEC_HPD_SIZE 2048 + +#define F32_CE_PROGRAM_RAM_SIZE 65536 +#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L + +#define mmCGTT_GS_NGG_CLK_CTRL 0x5087 +#define mmCGTT_GS_NGG_CLK_CTRL_BASE_IDX 1 + +MODULE_FIRMWARE("amdgpu/navi10_ce.bin"); +MODULE_FIRMWARE("amdgpu/navi10_pfp.bin"); +MODULE_FIRMWARE("amdgpu/navi10_me.bin"); +MODULE_FIRMWARE("amdgpu/navi10_mec.bin"); +MODULE_FIRMWARE("amdgpu/navi10_mec2.bin"); +MODULE_FIRMWARE("amdgpu/navi10_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/navi14_ce_wks.bin"); +MODULE_FIRMWARE("amdgpu/navi14_pfp_wks.bin"); +MODULE_FIRMWARE("amdgpu/navi14_me_wks.bin"); +MODULE_FIRMWARE("amdgpu/navi14_mec_wks.bin"); +MODULE_FIRMWARE("amdgpu/navi14_mec2_wks.bin"); +MODULE_FIRMWARE("amdgpu/navi14_ce.bin"); +MODULE_FIRMWARE("amdgpu/navi14_pfp.bin"); +MODULE_FIRMWARE("amdgpu/navi14_me.bin"); +MODULE_FIRMWARE("amdgpu/navi14_mec.bin"); +MODULE_FIRMWARE("amdgpu/navi14_mec2.bin"); +MODULE_FIRMWARE("amdgpu/navi14_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/navi12_ce.bin"); +MODULE_FIRMWARE("amdgpu/navi12_pfp.bin"); +MODULE_FIRMWARE("amdgpu/navi12_me.bin"); +MODULE_FIRMWARE("amdgpu/navi12_mec.bin"); +MODULE_FIRMWARE("amdgpu/navi12_mec2.bin"); +MODULE_FIRMWARE("amdgpu/navi12_rlc.bin"); + +static const struct soc15_reg_golden golden_settings_gc_10_1[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0x60000ff0, 0x60000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000000, 0x40000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xfeff8fff, 0xfeff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000002, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x000007ff, 0x000005ff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0x20000000, 0x20000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000200, 0x00000200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0x07900000, 0x04900000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x000007ff, 0x000001fe), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x10321032), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x02310231), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0x10000000, 0x10000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffff9fff, 0x00001188), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CGTT_CLK_CTRL, 0xfeff0fff, 0x40000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000) +}; + +static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] = +{ + /* Pending on emulation bring up */ +}; + +static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x003c0014), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xf8ff0fff, 0x60000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000ff0, 0x40000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000002, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000200, 0x00000200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04900000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x000007ff, 0x000001fe), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffe7), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffe7), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000), +}; + +static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x003e001f, 0x003c0014), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0x0d000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xffffcfff, 0x60000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0xffff0fff, 0x40000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000003, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x00007fff, 0x000001fe), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x10321032), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x02310231), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_0, 0xffffffff, 0x842a4c02), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04440000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000820, 0x00000820), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0xffdf80ff, 0x479c0010), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00800000) +}; + +static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] = +{ + /* Pending on emulation bring up */ +}; + +static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12[] = +{ + /* Pending on emulation bring up */ +}; + +#define DEFAULT_SH_MEM_CONFIG \ + ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ + (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ + (SH_MEM_RETRY_MODE_ALL << SH_MEM_CONFIG__RETRY_MODE__SHIFT) | \ + (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) + + +static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev); +static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev); +static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev); +static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev); +static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, + struct amdgpu_cu_info *cu_info); +static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev); +static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 instance); +static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); + +static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device *adev); +static void gfx_v10_0_rlc_backdoor_autoload_buffer_fini(struct amdgpu_device *adev); +static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev); +static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); +static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume); +static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); +static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start); + +static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); + amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | + PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ + amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ + amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* oac mask */ + amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ +} + +static void gfx10_kiq_map_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = kiq_ring->adev; + uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(ring->queue) | + PACKET3_MAP_QUEUES_PIPE(ring->pipe) | + PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ + amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); +} + +static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + enum amdgpu_unmap_queues_action action, + u64 gpu_addr, u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_UNMAP_QUEUES_ACTION(action) | + PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | + PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | + PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); + + if (action == PREEMPT_QUEUES_NO_UNMAP) { + amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, seq); + } else { + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + } +} + +static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + u64 addr, + u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_CONTEXT_ID(0) | + PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | + PACKET3_QUERY_STATUS_COMMAND(2)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | + PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); + amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); + amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); +} + +static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = { + .kiq_set_resources = gfx10_kiq_set_resources, + .kiq_map_queues = gfx10_kiq_map_queues, + .kiq_unmap_queues = gfx10_kiq_unmap_queues, + .kiq_query_status = gfx10_kiq_query_status, + .set_resources_size = 8, + .map_queues_size = 7, + .unmap_queues_size = 6, + .query_status_size = 7, +}; + +static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) +{ + adev->gfx.kiq.pmf = &gfx_v10_0_kiq_pm4_funcs; +} + +static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_NAVI10: + soc15_program_register_sequence(adev, + golden_settings_gc_10_1, + (const u32)ARRAY_SIZE(golden_settings_gc_10_1)); + soc15_program_register_sequence(adev, + golden_settings_gc_10_0_nv10, + (const u32)ARRAY_SIZE(golden_settings_gc_10_0_nv10)); + break; + case CHIP_NAVI14: + soc15_program_register_sequence(adev, + golden_settings_gc_10_1_1, + (const u32)ARRAY_SIZE(golden_settings_gc_10_1_1)); + soc15_program_register_sequence(adev, + golden_settings_gc_10_1_nv14, + (const u32)ARRAY_SIZE(golden_settings_gc_10_1_nv14)); + break; + case CHIP_NAVI12: + soc15_program_register_sequence(adev, + golden_settings_gc_10_1_2, + (const u32)ARRAY_SIZE(golden_settings_gc_10_1_2)); + soc15_program_register_sequence(adev, + golden_settings_gc_10_1_2_nv12, + (const u32)ARRAY_SIZE(golden_settings_gc_10_1_2_nv12)); + break; + default: + break; + } +} + +static void gfx_v10_0_scratch_init(struct amdgpu_device *adev) +{ + adev->gfx.scratch.num_reg = 8; + adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); + adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; +} + +static void gfx_v10_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, + bool wc, uint32_t reg, uint32_t val) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | + WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0)); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); +} + +static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, + int mem_space, int opt, uint32_t addr0, + uint32_t addr1, uint32_t ref, uint32_t mask, + uint32_t inv) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, + /* memory (1) or register (0) */ + (WAIT_REG_MEM_MEM_SPACE(mem_space) | + WAIT_REG_MEM_OPERATION(opt) | /* wait */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(eng_sel))); + + if (mem_space) + BUG_ON(addr0 & 0x3); /* Dword align */ + amdgpu_ring_write(ring, addr0); + amdgpu_ring_write(ring, addr1); + amdgpu_ring_write(ring, ref); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, inv); /* poll interval */ +} + +static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t scratch; + uint32_t tmp = 0; + unsigned i; + int r; + + r = amdgpu_gfx_scratch_get(adev, &scratch); + if (r) { + DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); + return r; + } + + WREG32(scratch, 0xCAFEDEAD); + + r = amdgpu_ring_alloc(ring, 3); + if (r) { + DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", + ring->idx, r); + amdgpu_gfx_scratch_free(adev, scratch); + return r; + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); + amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) + break; + if (amdgpu_emu_mode == 1) + msleep(1); + else + udelay(1); + } + if (i < adev->usec_timeout) { + if (amdgpu_emu_mode == 1) + DRM_INFO("ring test on %d succeeded in %d msecs\n", + ring->idx, i); + else + DRM_INFO("ring test on %d succeeded in %d usecs\n", + ring->idx, i); + } else { + DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", + ring->idx, scratch, tmp); + r = -EINVAL; + } + amdgpu_gfx_scratch_free(adev, scratch); + + return r; +} + +static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ib ib; + struct dma_fence *f = NULL; + uint32_t scratch; + uint32_t tmp = 0; + long r; + + r = amdgpu_gfx_scratch_get(adev, &scratch); + if (r) { + DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); + return r; + } + + WREG32(scratch, 0xCAFEDEAD); + + memset(&ib, 0, sizeof(ib)); + r = amdgpu_ib_get(adev, NULL, 256, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err1; + } + + ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); + ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); + ib.ptr[2] = 0xDEADBEEF; + ib.length_dw = 3; + + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + if (r) + goto err2; + + r = dma_fence_wait_timeout(f, false, timeout); + if (r == 0) { + DRM_ERROR("amdgpu: IB test timed out.\n"); + r = -ETIMEDOUT; + goto err2; + } else if (r < 0) { + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + goto err2; + } + + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) { + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + r = 0; + } else { + DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", + scratch, tmp); + r = -EINVAL; + } +err2: + amdgpu_ib_free(adev, &ib, NULL); + dma_fence_put(f); +err1: + amdgpu_gfx_scratch_free(adev, scratch); + + return r; +} + +static void gfx_v10_0_free_microcode(struct amdgpu_device *adev) +{ + release_firmware(adev->gfx.pfp_fw); + adev->gfx.pfp_fw = NULL; + release_firmware(adev->gfx.me_fw); + adev->gfx.me_fw = NULL; + release_firmware(adev->gfx.ce_fw); + adev->gfx.ce_fw = NULL; + release_firmware(adev->gfx.rlc_fw); + adev->gfx.rlc_fw = NULL; + release_firmware(adev->gfx.mec_fw); + adev->gfx.mec_fw = NULL; + release_firmware(adev->gfx.mec2_fw); + adev->gfx.mec2_fw = NULL; + + kfree(adev->gfx.rlc.register_list_format); +} + +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) +{ + adev->gfx.cp_fw_write_wait = false; + + switch (adev->asic_type) { + case CHIP_NAVI10: + case CHIP_NAVI12: + case CHIP_NAVI14: + if ((adev->gfx.me_fw_version >= 0x00000046) && + (adev->gfx.me_feature_version >= 27) && + (adev->gfx.pfp_fw_version >= 0x00000068) && + (adev->gfx.pfp_feature_version >= 27) && + (adev->gfx.mec_fw_version >= 0x0000005b) && + (adev->gfx.mec_feature_version >= 27)) + adev->gfx.cp_fw_write_wait = true; + break; + default: + break; + } + + if (adev->gfx.cp_fw_write_wait == false) + DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \ + GRBM requires 1-cycle delay in cp firmware\n"); +} + + +static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_1 *rlc_hdr; + + rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); + adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); + adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); + adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); + adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); + adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); + adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); + adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); + adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); + adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); + adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); + adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); + adev->gfx.rlc.reg_list_format_direct_reg_list_length = + le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); +} + +static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_NAVI10: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + break; + default: + break; + } +} + +static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) +{ + const char *chip_name; + char fw_name[40]; + char wks[10]; + int err; + struct amdgpu_firmware_info *info = NULL; + const struct common_firmware_header *header = NULL; + const struct gfx_firmware_header_v1_0 *cp_hdr; + const struct rlc_firmware_header_v2_0 *rlc_hdr; + unsigned int *tmp = NULL; + unsigned int i = 0; + uint16_t version_major; + uint16_t version_minor; + + DRM_DEBUG("\n"); + + memset(wks, 0, sizeof(wks)); + switch (adev->asic_type) { + case CHIP_NAVI10: + chip_name = "navi10"; + break; + case CHIP_NAVI14: + chip_name = "navi14"; + if (!(adev->pdev->device == 0x7340 && + adev->pdev->revision != 0x00)) + snprintf(wks, sizeof(wks), "_wks"); + break; + case CHIP_NAVI12: + chip_name = "navi12"; + break; + default: + BUG(); + } + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", chip_name, wks); + err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.pfp_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; + adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", chip_name, wks); + err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.me_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; + adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", chip_name, wks); + err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.ce_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; + adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); + err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.rlc_fw); + rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + version_major = le16_to_cpu(rlc_hdr->header.header_version_major); + version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); + if (version_major == 2 && version_minor == 1) + adev->gfx.rlc.is_rlc_v2_1 = true; + + adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); + adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); + adev->gfx.rlc.save_and_restore_offset = + le32_to_cpu(rlc_hdr->save_and_restore_offset); + adev->gfx.rlc.clear_state_descriptor_offset = + le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); + adev->gfx.rlc.avail_scratch_ram_locations = + le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); + adev->gfx.rlc.reg_restore_list_size = + le32_to_cpu(rlc_hdr->reg_restore_list_size); + adev->gfx.rlc.reg_list_format_start = + le32_to_cpu(rlc_hdr->reg_list_format_start); + adev->gfx.rlc.reg_list_format_separate_start = + le32_to_cpu(rlc_hdr->reg_list_format_separate_start); + adev->gfx.rlc.starting_offsets_start = + le32_to_cpu(rlc_hdr->starting_offsets_start); + adev->gfx.rlc.reg_list_format_size_bytes = + le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); + adev->gfx.rlc.reg_list_size_bytes = + le32_to_cpu(rlc_hdr->reg_list_size_bytes); + adev->gfx.rlc.register_list_format = + kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + + adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); + if (!adev->gfx.rlc.register_list_format) { + err = -ENOMEM; + goto out; + } + + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) + adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); + + adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; + + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) + adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); + + if (adev->gfx.rlc.is_rlc_v2_1) + gfx_v10_0_init_rlc_ext_microcode(adev); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, wks); + err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.mec_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2%s.bin", chip_name, wks); + err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); + if (!err) { + err = amdgpu_ucode_validate(adev->gfx.mec2_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.mec2_fw->data; + adev->gfx.mec2_fw_version = + le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.mec2_feature_version = + le32_to_cpu(cp_hdr->ucode_feature_version); + } else { + err = 0; + adev->gfx.mec2_fw = NULL; + } + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; + info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; + info->fw = adev->gfx.pfp_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; + info->ucode_id = AMDGPU_UCODE_ID_CP_ME; + info->fw = adev->gfx.me_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; + info->ucode_id = AMDGPU_UCODE_ID_CP_CE; + info->fw = adev->gfx.ce_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_G; + info->fw = adev->gfx.rlc_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + if (adev->gfx.rlc.is_rlc_v2_1 && + adev->gfx.rlc.save_restore_list_cntl_size_bytes && + adev->gfx.rlc.save_restore_list_gpm_size_bytes && + adev->gfx.rlc.save_restore_list_srm_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); + } + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; + info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; + info->fw = adev->gfx.mec_fw; + header = (const struct common_firmware_header *)info->fw->data; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes) - + le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; + info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; + info->fw = adev->gfx.mec_fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); + + if (adev->gfx.mec2_fw) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; + info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; + info->fw = adev->gfx.mec2_fw; + header = (const struct common_firmware_header *)info->fw->data; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes) - + le32_to_cpu(cp_hdr->jt_size) * 4, + PAGE_SIZE); + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; + info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; + info->fw = adev->gfx.mec2_fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, + PAGE_SIZE); + } + } + + gfx_v10_0_check_fw_write_wait(adev); +out: + if (err) { + dev_err(adev->dev, + "gfx10: Failed to load firmware \"%s\"\n", + fw_name); + release_firmware(adev->gfx.pfp_fw); + adev->gfx.pfp_fw = NULL; + release_firmware(adev->gfx.me_fw); + adev->gfx.me_fw = NULL; + release_firmware(adev->gfx.ce_fw); + adev->gfx.ce_fw = NULL; + release_firmware(adev->gfx.rlc_fw); + adev->gfx.rlc_fw = NULL; + release_firmware(adev->gfx.mec_fw); + adev->gfx.mec_fw = NULL; + release_firmware(adev->gfx.mec2_fw); + adev->gfx.mec2_fw = NULL; + } + + gfx_v10_0_check_gfxoff_flag(adev); + + return err; +} + +static u32 gfx_v10_0_get_csb_size(struct amdgpu_device *adev) +{ + u32 count = 0; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + + /* begin clear state */ + count += 2; + /* context control state */ + count += 3; + + for (sect = gfx10_cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) + count += 2 + ext->reg_count; + else + return 0; + } + } + + /* set PA_SC_TILE_STEERING_OVERRIDE */ + count += 3; + /* end clear state */ + count += 2; + /* clear state */ + count += 2; + + return count; +} + +static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev, + volatile u32 *buffer) +{ + u32 count = 0, i; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + int ctx_reg_offset; + + if (adev->gfx.rlc.cs_data == NULL) + return; + if (buffer == NULL) + return; + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); + buffer[count++] = cpu_to_le32(0x80000000); + buffer[count++] = cpu_to_le32(0x80000000); + + for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) { + buffer[count++] = + cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); + buffer[count++] = cpu_to_le32(ext->reg_index - + PACKET3_SET_CONTEXT_REG_START); + for (i = 0; i < ext->reg_count; i++) + buffer[count++] = cpu_to_le32(ext->extent[i]); + } else { + return; + } + } + } + + ctx_reg_offset = + SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + buffer[count++] = cpu_to_le32(ctx_reg_offset); + buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override); + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); + buffer[count++] = cpu_to_le32(0); +} + +static void gfx_v10_0_rlc_fini(struct amdgpu_device *adev) +{ + /* clear state block */ + amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, + &adev->gfx.rlc.clear_state_gpu_addr, + (void **)&adev->gfx.rlc.cs_ptr); + + /* jump table block */ + amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, + &adev->gfx.rlc.cp_table_gpu_addr, + (void **)&adev->gfx.rlc.cp_table_ptr); +} + +static int gfx_v10_0_rlc_init(struct amdgpu_device *adev) +{ + const struct cs_section_def *cs_data; + int r; + + adev->gfx.rlc.cs_data = gfx10_cs_data; + + cs_data = adev->gfx.rlc.cs_data; + + if (cs_data) { + /* init clear state block */ + r = amdgpu_gfx_rlc_init_csb(adev); + if (r) + return r; + } + + return 0; +} + +static int gfx_v10_0_csb_vram_pin(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); + if (unlikely(r != 0)) + return r; + + r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, + AMDGPU_GEM_DOMAIN_VRAM); + if (!r) + adev->gfx.rlc.clear_state_gpu_addr = + amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); + + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + + return r; +} + +static void gfx_v10_0_csb_vram_unpin(struct amdgpu_device *adev) +{ + int r; + + if (!adev->gfx.rlc.clear_state_obj) + return; + + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); + if (likely(r == 0)) { + amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + } +} + +static void gfx_v10_0_mec_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); +} + +static int gfx_v10_0_me_init(struct amdgpu_device *adev) +{ + int r; + + bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); + + amdgpu_gfx_graphics_queue_acquire(adev); + + r = gfx_v10_0_init_microcode(adev); + if (r) + DRM_ERROR("Failed to load gfx firmware!\n"); + + return r; +} + +static int gfx_v10_0_mec_init(struct amdgpu_device *adev) +{ + int r; + u32 *hpd; + const __le32 *fw_data = NULL; + unsigned fw_size; + u32 *fw = NULL; + size_t mec_hpd_size; + + const struct gfx_firmware_header_v1_0 *mec_hdr = NULL; + + bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + + /* take ownership of the relevant compute queues */ + amdgpu_gfx_compute_queue_acquire(adev); + mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE; + + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.hpd_eop_obj, + &adev->gfx.mec.hpd_eop_gpu_addr, + (void **)&hpd); + if (r) { + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); + gfx_v10_0_mec_fini(adev); + return r; + } + + memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); + + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + + fw_data = (const __le32 *) (adev->gfx.mec_fw->data + + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); + + r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.mec_fw_obj, + &adev->gfx.mec.mec_fw_gpu_addr, + (void **)&fw); + if (r) { + dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r); + gfx_v10_0_mec_fini(adev); + return r; + } + + memcpy(fw, fw_data, fw_size); + + amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); + amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); + } + + return 0; +} + +static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address) +{ + WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, + (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | + (address << SQ_IND_INDEX__INDEX__SHIFT)); + return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); +} + +static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, + uint32_t thread, uint32_t regno, + uint32_t num, uint32_t *out) +{ + WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, + (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | + (regno << SQ_IND_INDEX__INDEX__SHIFT) | + (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) | + (SQ_IND_INDEX__AUTO_INCR_MASK)); + while (num--) + *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); +} + +static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +{ + /* in gfx10 the SIMD_ID is specified as part of the INSTANCE + * field when performing a select_se_sh so it should be + * zero here */ + WARN_ON(simd != 0); + + /* type 2 wave data */ + dst[(*no_fields)++] = 2; + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_INST_DW0); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); +} + +static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, + uint32_t wave, uint32_t start, + uint32_t size, uint32_t *dst) +{ + WARN_ON(simd != 0); + + wave_read_regs( + adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size, + dst); +} + +static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, + uint32_t wave, uint32_t thread, + uint32_t start, uint32_t size, + uint32_t *dst) +{ + wave_read_regs( + adev, wave, thread, + start + SQIND_WAVE_VGPRS_OFFSET, size, dst); +} + +static void gfx_v10_0_select_me_pipe_q(struct amdgpu_device *adev, + u32 me, u32 pipe, u32 q, u32 vm) + { + nv_grbm_select(adev, me, pipe, q, vm); + } + + +static const struct amdgpu_gfx_funcs gfx_v10_0_gfx_funcs = { + .get_gpu_clock_counter = &gfx_v10_0_get_gpu_clock_counter, + .select_se_sh = &gfx_v10_0_select_se_sh, + .read_wave_data = &gfx_v10_0_read_wave_data, + .read_wave_sgprs = &gfx_v10_0_read_wave_sgprs, + .read_wave_vgprs = &gfx_v10_0_read_wave_vgprs, + .select_me_pipe_q = &gfx_v10_0_select_me_pipe_q, +}; + +static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) +{ + u32 gb_addr_config; + + adev->gfx.funcs = &gfx_v10_0_gfx_funcs; + + switch (adev->asic_type) { + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); + break; + default: + BUG(); + break; + } + + adev->gfx.config.gb_addr_config = gb_addr_config; + + adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << + REG_GET_FIELD(adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, NUM_PIPES); + + adev->gfx.config.max_tile_pipes = + adev->gfx.config.gb_addr_config_fields.num_pipes; + + adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << + REG_GET_FIELD(adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS); + adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << + REG_GET_FIELD(adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, NUM_RB_PER_SE); + adev->gfx.config.gb_addr_config_fields.num_se = 1 << + REG_GET_FIELD(adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, NUM_SHADER_ENGINES); + adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + + REG_GET_FIELD(adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE)); +} + +static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, + int me, int pipe, int queue) +{ + int r; + struct amdgpu_ring *ring; + unsigned int irq_type; + + ring = &adev->gfx.gfx_ring[ring_id]; + + ring->me = me; + ring->pipe = pipe; + ring->queue = queue; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + + if (!ring_id) + ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; + else + ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; + sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); + + irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; + r = amdgpu_ring_init(adev, ring, 1024, + &adev->gfx.eop_irq, irq_type); + if (r) + return r; + return 0; +} + +static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + int mec, int pipe, int queue) +{ + int r; + unsigned irq_type; + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; + + ring = &adev->gfx.compute_ring[ring_id]; + + /* mec0 is me1 */ + ring->me = mec + 1; + ring->pipe = pipe; + ring->queue = queue; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; + ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + + (ring_id * GFX10_MEC_HPD_SIZE); + sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); + + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + + ring->pipe; + + /* type-2 packets are deprecated on MEC, use type-3 instead */ + r = amdgpu_ring_init(adev, ring, 1024, + &adev->gfx.eop_irq, irq_type); + if (r) + return r; + + return 0; +} + +static int gfx_v10_0_sw_init(void *handle) +{ + int i, j, k, r, ring_id = 0; + struct amdgpu_kiq *kiq; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + switch (adev->asic_type) { + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + adev->gfx.me.num_me = 1; + adev->gfx.me.num_pipe_per_me = 2; + adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.mec.num_mec = 2; + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; + break; + default: + adev->gfx.me.num_me = 1; + adev->gfx.me.num_pipe_per_me = 1; + adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.mec.num_mec = 1; + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; + break; + } + + /* KIQ event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, + GFX_10_1__SRCID__CP_IB2_INTERRUPT_PKT, + &adev->gfx.kiq.irq); + if (r) + return r; + + /* EOP Event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, + GFX_10_1__SRCID__CP_EOP_INTERRUPT, + &adev->gfx.eop_irq); + if (r) + return r; + + /* Privileged reg */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_REG_FAULT, + &adev->gfx.priv_reg_irq); + if (r) + return r; + + /* Privileged inst */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_INSTR_FAULT, + &adev->gfx.priv_inst_irq); + if (r) + return r; + + adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; + + gfx_v10_0_scratch_init(adev); + + r = gfx_v10_0_me_init(adev); + if (r) + return r; + + r = gfx_v10_0_rlc_init(adev); + if (r) { + DRM_ERROR("Failed to init rlc BOs!\n"); + return r; + } + + r = gfx_v10_0_mec_init(adev); + if (r) { + DRM_ERROR("Failed to init MEC BOs!\n"); + return r; + } + + /* set up the gfx ring */ + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { + if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) + continue; + + r = gfx_v10_0_gfx_ring_init(adev, ring_id, + i, k, j); + if (r) + return r; + ring_id++; + } + } + } + + ring_id = 0; + /* set up the compute queues - allocate horizontally across pipes */ + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, + j)) + continue; + + r = gfx_v10_0_compute_ring_init(adev, ring_id, + i, k, j); + if (r) + return r; + + ring_id++; + } + } + } + + r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE); + if (r) { + DRM_ERROR("Failed to init KIQ BOs!\n"); + return r; + } + + kiq = &adev->gfx.kiq; + r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); + if (r) + return r; + + r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd)); + if (r) + return r; + + /* allocate visible FB for rlc auto-loading fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { + r = gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev); + if (r) + return r; + } + + adev->gfx.ce_ram_size = F32_CE_PROGRAM_RAM_SIZE; + + gfx_v10_0_gpu_early_init(adev); + + return 0; +} + +static void gfx_v10_0_pfp_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj, + &adev->gfx.pfp.pfp_fw_gpu_addr, + (void **)&adev->gfx.pfp.pfp_fw_ptr); +} + +static void gfx_v10_0_ce_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->gfx.ce.ce_fw_obj, + &adev->gfx.ce.ce_fw_gpu_addr, + (void **)&adev->gfx.ce.ce_fw_ptr); +} + +static void gfx_v10_0_me_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj, + &adev->gfx.me.me_fw_gpu_addr, + (void **)&adev->gfx.me.me_fw_ptr); +} + +static int gfx_v10_0_sw_fini(void *handle) +{ + int i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) + amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); + for (i = 0; i < adev->gfx.num_compute_rings; i++) + amdgpu_ring_fini(&adev->gfx.compute_ring[i]); + + amdgpu_gfx_mqd_sw_fini(adev); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); + amdgpu_gfx_kiq_fini(adev); + + gfx_v10_0_pfp_fini(adev); + gfx_v10_0_ce_fini(adev); + gfx_v10_0_me_fini(adev); + gfx_v10_0_rlc_fini(adev); + gfx_v10_0_mec_fini(adev); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) + gfx_v10_0_rlc_backdoor_autoload_buffer_fini(adev); + + gfx_v10_0_free_microcode(adev); + + return 0; +} + + +static void gfx_v10_0_tiling_mode_table_init(struct amdgpu_device *adev) +{ + /* TODO */ +} + +static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 instance) +{ + u32 data; + + if (instance == 0xffffffff) + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, + INSTANCE_BROADCAST_WRITES, 1); + else + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, + instance); + + if (se_num == 0xffffffff) + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, + 1); + else + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); + + if (sh_num == 0xffffffff) + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES, + 1); + else + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num); + + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); +} + +static u32 gfx_v10_0_get_rb_active_bitmap(struct amdgpu_device *adev) +{ + u32 data, mask; + + data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); + data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); + + data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; + data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; + + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se); + + return (~data) & mask; +} + +static void gfx_v10_0_setup_rb(struct amdgpu_device *adev) +{ + int i, j; + u32 data; + u32 active_rbs = 0; + u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se; + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); + data = gfx_v10_0_get_rb_active_bitmap(adev); + active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * + rb_bitmap_width_per_sh); + } + } + gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + + adev->gfx.config.backend_enable_mask = active_rbs; + adev->gfx.config.num_rbs = hweight32(active_rbs); +} + +static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *adev) +{ + uint32_t num_sc; + uint32_t enabled_rb_per_sh; + uint32_t active_rb_bitmap; + uint32_t num_rb_per_sc; + uint32_t num_packer_per_sc; + uint32_t pa_sc_tile_steering_override; + + /* init num_sc */ + num_sc = adev->gfx.config.max_shader_engines * adev->gfx.config.max_sh_per_se * + adev->gfx.config.num_sc_per_sh; + /* init num_rb_per_sc */ + active_rb_bitmap = gfx_v10_0_get_rb_active_bitmap(adev); + enabled_rb_per_sh = hweight32(active_rb_bitmap); + num_rb_per_sc = enabled_rb_per_sh / adev->gfx.config.num_sc_per_sh; + /* init num_packer_per_sc */ + num_packer_per_sc = adev->gfx.config.num_packer_per_sc; + + pa_sc_tile_steering_override = 0; + pa_sc_tile_steering_override |= + (order_base_2(num_sc) << PA_SC_TILE_STEERING_OVERRIDE__NUM_SC__SHIFT) & + PA_SC_TILE_STEERING_OVERRIDE__NUM_SC_MASK; + pa_sc_tile_steering_override |= + (order_base_2(num_rb_per_sc) << PA_SC_TILE_STEERING_OVERRIDE__NUM_RB_PER_SC__SHIFT) & + PA_SC_TILE_STEERING_OVERRIDE__NUM_RB_PER_SC_MASK; + pa_sc_tile_steering_override |= + (order_base_2(num_packer_per_sc) << PA_SC_TILE_STEERING_OVERRIDE__NUM_PACKER_PER_SC__SHIFT) & + PA_SC_TILE_STEERING_OVERRIDE__NUM_PACKER_PER_SC_MASK; + + return pa_sc_tile_steering_override; +} + +#define DEFAULT_SH_MEM_BASES (0x6000) +#define FIRST_COMPUTE_VMID (8) +#define LAST_COMPUTE_VMID (16) + +static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) +{ + int i; + uint32_t sh_mem_bases; + + /* + * Configure apertures: + * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) + * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) + * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) + */ + sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); + + mutex_lock(&adev->srbm_mutex); + for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + nv_grbm_select(adev, 0, 0, 0, i); + /* CP and shaders */ + WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); + WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); + } + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + /* Initialize all compute VMIDs to have no GDS, GWS, or OA + acccess. These should be enabled by FW for target VMIDs. */ + for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); + } +} + +static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev) +{ + int vmid; + + /* + * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA + * access. Compute VMIDs should be enabled by FW for target VMIDs, + * the driver can enable them for graphics. VMID0 should maintain + * access so that HWS firmware can save/restore entries. + */ + for (vmid = 1; vmid < 16; vmid++) { + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); + } +} + + +static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) +{ + int i, j, k; + int max_wgp_per_sh = adev->gfx.config.max_cu_per_sh >> 1; + u32 tmp, wgp_active_bitmap = 0; + u32 gcrd_targets_disable_tcp = 0; + u32 utcl_invreq_disable = 0; + /* + * GCRD_TARGETS_DISABLE field contains + * for Navi10/Navi12: GL1C=[18:15], SQC=[14:10], TCP=[9:0] + * for Navi14: GL1C=[21:18], SQC=[17:12], TCP=[11:0] + */ + u32 gcrd_targets_disable_mask = amdgpu_gfx_create_bitmask( + 2 * max_wgp_per_sh + /* TCP */ + max_wgp_per_sh + /* SQC */ + 4); /* GL1C */ + /* + * UTCL1_UTCL0_INVREQ_DISABLE field contains + * for Navi10Navi12: SQG=[24], RMI=[23:20], SQC=[19:10], TCP=[9:0] + * for Navi14: SQG=[28], RMI=[27:24], SQC=[23:12], TCP=[11:0] + */ + u32 utcl_invreq_disable_mask = amdgpu_gfx_create_bitmask( + 2 * max_wgp_per_sh + /* TCP */ + 2 * max_wgp_per_sh + /* SQC */ + 4 + /* RMI */ + 1); /* SQG */ + + if (adev->asic_type == CHIP_NAVI10 || + adev->asic_type == CHIP_NAVI14 || + adev->asic_type == CHIP_NAVI12) { + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); + wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev); + /* + * Set corresponding TCP bits for the inactive WGPs in + * GCRD_SA_TARGETS_DISABLE + */ + gcrd_targets_disable_tcp = 0; + /* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */ + utcl_invreq_disable = 0; + + for (k = 0; k < max_wgp_per_sh; k++) { + if (!(wgp_active_bitmap & (1 << k))) { + gcrd_targets_disable_tcp |= 3 << (2 * k); + utcl_invreq_disable |= (3 << (2 * k)) | + (3 << (2 * (max_wgp_per_sh + k))); + } + } + + tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE); + /* only override TCP & SQC bits */ + tmp &= 0xffffffff << (4 * max_wgp_per_sh); + tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask); + WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp); + + tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE); + /* only override TCP bits */ + tmp &= 0xffffffff << (2 * max_wgp_per_sh); + tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask); + WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp); + } + } + + gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + } +} + +static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev) +{ + /* TCCs are global (not instanced). */ + uint32_t tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE) | + RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE); + + adev->gfx.config.tcc_disabled_mask = + REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) | + (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16); +} + +static void gfx_v10_0_constants_init(struct amdgpu_device *adev) +{ + u32 tmp; + int i; + + WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); + + gfx_v10_0_tiling_mode_table_init(adev); + + gfx_v10_0_setup_rb(adev); + gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info); + gfx_v10_0_get_tcc_info(adev); + adev->gfx.config.pa_sc_tile_steering_override = + gfx_v10_0_init_pa_sc_tile_steering_override(adev); + + /* XXX SH_MEM regs */ + /* where to put LDS, scratch, GPUVM in FSA64 space */ + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { + nv_grbm_select(adev, 0, 0, 0, i); + /* CP and shaders */ + WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); + if (i != 0) { + tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, + (adev->gmc.private_aperture_start >> 48)); + tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, + (adev->gmc.shared_aperture_start >> 48)); + WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp); + } + } + nv_grbm_select(adev, 0, 0, 0, 0); + + mutex_unlock(&adev->srbm_mutex); + + gfx_v10_0_init_compute_vmid(adev); + gfx_v10_0_init_gds_vmid(adev); + +} + +static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); + + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, + enable ? 1 : 0); + + WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); +} + +static int gfx_v10_0_init_csb(struct amdgpu_device *adev) +{ + int r; + + if (adev->in_gpu_reset) { + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); + if (r) + return r; + + r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, + (void **)&adev->gfx.rlc.cs_ptr); + if (!r) { + adev->gfx.rlc.funcs->get_csb_buffer(adev, + adev->gfx.rlc.cs_ptr); + amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); + } + + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + if (r) + return r; + } + + /* csib */ + WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI, + adev->gfx.rlc.clear_state_gpu_addr >> 32); + WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO, + adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); + WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); + + return 0; +} + +static int gfx_v10_0_init_pg(struct amdgpu_device *adev) +{ + int i; + int r; + + r = gfx_v10_0_init_csb(adev); + if (r) + return r; + + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); + + /* TODO: init power gating */ + return 0; +} + +void gfx_v10_0_rlc_stop(struct amdgpu_device *adev) +{ + u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL); + + tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); + WREG32_SOC15(GC, 0, mmRLC_CNTL, tmp); +} + +static void gfx_v10_0_rlc_reset(struct amdgpu_device *adev) +{ + WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); + udelay(50); + WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); + udelay(50); +} + +static void gfx_v10_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev, + bool enable) +{ + uint32_t rlc_pg_cntl; + + rlc_pg_cntl = RREG32_SOC15(GC, 0, mmRLC_PG_CNTL); + + if (!enable) { + /* RLC_PG_CNTL[23] = 0 (default) + * RLC will wait for handshake acks with SMU + * GFXOFF will be enabled + * RLC_PG_CNTL[23] = 1 + * RLC will not issue any message to SMU + * hence no handshake between SMU & RLC + * GFXOFF will be disabled + */ + rlc_pg_cntl |= 0x800000; + } else + rlc_pg_cntl &= ~0x800000; + WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, rlc_pg_cntl); +} + +static void gfx_v10_0_rlc_start(struct amdgpu_device *adev) +{ + /* TODO: enable rlc & smu handshake until smu + * and gfxoff feature works as expected */ + if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK)) + gfx_v10_0_rlc_smu_handshake_cntl(adev, false); + + WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); + udelay(50); +} + +static void gfx_v10_0_rlc_enable_srm(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* enable Save Restore Machine */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); + tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; + tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); +} + +static int gfx_v10_0_rlc_load_microcode(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_0 *hdr; + const __le32 *fw_data; + unsigned i, fw_size; + + if (!adev->gfx.rlc_fw) + return -EINVAL; + + hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + amdgpu_ucode_print_rlc_hdr(&hdr->header); + + fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; + + WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, + RLCG_UCODE_LOADING_START_ADDRESS); + + for (i = 0; i < fw_size; i++) + WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, + le32_to_cpup(fw_data++)); + + WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); + + return 0; +} + +static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) +{ + int r; + + if (amdgpu_sriov_vf(adev)) + return 0; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + r = gfx_v10_0_wait_for_rlc_autoload_complete(adev); + if (r) + return r; + + r = gfx_v10_0_init_pg(adev); + if (r) + return r; + + /* enable RLC SRM */ + gfx_v10_0_rlc_enable_srm(adev); + + } else { + adev->gfx.rlc.funcs->stop(adev); + + /* disable CG */ + WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); + + /* disable PG */ + WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, 0); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + /* legacy rlc firmware loading */ + r = gfx_v10_0_rlc_load_microcode(adev); + if (r) + return r; + } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { + /* rlc backdoor autoload firmware */ + r = gfx_v10_0_rlc_backdoor_autoload_enable(adev); + if (r) + return r; + } + + r = gfx_v10_0_init_pg(adev); + if (r) + return r; + + adev->gfx.rlc.funcs->start(adev); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { + r = gfx_v10_0_wait_for_rlc_autoload_complete(adev); + if (r) + return r; + } + } + return 0; +} + +static struct { + FIRMWARE_ID id; + unsigned int offset; + unsigned int size; +} rlc_autoload_info[FIRMWARE_ID_MAX]; + +static int gfx_v10_0_parse_rlc_toc(struct amdgpu_device *adev) +{ + int ret; + RLC_TABLE_OF_CONTENT *rlc_toc; + + ret = amdgpu_bo_create_reserved(adev, adev->psp.toc_bin_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.rlc.rlc_toc_bo, + &adev->gfx.rlc.rlc_toc_gpu_addr, + (void **)&adev->gfx.rlc.rlc_toc_buf); + if (ret) { + dev_err(adev->dev, "(%d) failed to create rlc toc bo\n", ret); + return ret; + } + + /* Copy toc from psp sos fw to rlc toc buffer */ + memcpy(adev->gfx.rlc.rlc_toc_buf, adev->psp.toc_start_addr, adev->psp.toc_bin_size); + + rlc_toc = (RLC_TABLE_OF_CONTENT *)adev->gfx.rlc.rlc_toc_buf; + while (rlc_toc && (rlc_toc->id > FIRMWARE_ID_INVALID) && + (rlc_toc->id < FIRMWARE_ID_MAX)) { + if ((rlc_toc->id >= FIRMWARE_ID_CP_CE) && + (rlc_toc->id <= FIRMWARE_ID_CP_MES)) { + /* Offset needs 4KB alignment */ + rlc_toc->offset = ALIGN(rlc_toc->offset * 4, PAGE_SIZE); + } + + rlc_autoload_info[rlc_toc->id].id = rlc_toc->id; + rlc_autoload_info[rlc_toc->id].offset = rlc_toc->offset * 4; + rlc_autoload_info[rlc_toc->id].size = rlc_toc->size * 4; + + rlc_toc++; + }; + + return 0; +} + +static uint32_t gfx_v10_0_calc_toc_total_size(struct amdgpu_device *adev) +{ + uint32_t total_size = 0; + FIRMWARE_ID id; + int ret; + + ret = gfx_v10_0_parse_rlc_toc(adev); + if (ret) { + dev_err(adev->dev, "failed to parse rlc toc\n"); + return 0; + } + + for (id = FIRMWARE_ID_RLC_G_UCODE; id < FIRMWARE_ID_MAX; id++) + total_size += rlc_autoload_info[id].size; + + /* In case the offset in rlc toc ucode is aligned */ + if (total_size < rlc_autoload_info[FIRMWARE_ID_MAX-1].offset) + total_size = rlc_autoload_info[FIRMWARE_ID_MAX-1].offset + + rlc_autoload_info[FIRMWARE_ID_MAX-1].size; + + return total_size; +} + +static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device *adev) +{ + int r; + uint32_t total_size; + + total_size = gfx_v10_0_calc_toc_total_size(adev); + + r = amdgpu_bo_create_reserved(adev, total_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.rlc.rlc_autoload_bo, + &adev->gfx.rlc.rlc_autoload_gpu_addr, + (void **)&adev->gfx.rlc.rlc_autoload_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r); + return r; + } + + return 0; +} + +static void gfx_v10_0_rlc_backdoor_autoload_buffer_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_toc_bo, + &adev->gfx.rlc.rlc_toc_gpu_addr, + (void **)&adev->gfx.rlc.rlc_toc_buf); + amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo, + &adev->gfx.rlc.rlc_autoload_gpu_addr, + (void **)&adev->gfx.rlc.rlc_autoload_ptr); +} + +static void gfx_v10_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev, + FIRMWARE_ID id, + const void *fw_data, + uint32_t fw_size) +{ + uint32_t toc_offset; + uint32_t toc_fw_size; + char *ptr = adev->gfx.rlc.rlc_autoload_ptr; + + if (id <= FIRMWARE_ID_INVALID || id >= FIRMWARE_ID_MAX) + return; + + toc_offset = rlc_autoload_info[id].offset; + toc_fw_size = rlc_autoload_info[id].size; + + if (fw_size == 0) + fw_size = toc_fw_size; + + if (fw_size > toc_fw_size) + fw_size = toc_fw_size; + + memcpy(ptr + toc_offset, fw_data, fw_size); + + if (fw_size < toc_fw_size) + memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size); +} + +static void gfx_v10_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev) +{ + void *data; + uint32_t size; + + data = adev->gfx.rlc.rlc_toc_buf; + size = rlc_autoload_info[FIRMWARE_ID_RLC_TOC].size; + + gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, + FIRMWARE_ID_RLC_TOC, + data, size); +} + +static void gfx_v10_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev) +{ + const __le32 *fw_data; + uint32_t fw_size; + const struct gfx_firmware_header_v1_0 *cp_hdr; + const struct rlc_firmware_header_v2_0 *rlc_hdr; + + /* pfp ucode */ + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.pfp_fw->data; + fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + + le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); + gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, + FIRMWARE_ID_CP_PFP, + fw_data, fw_size); + + /* ce ucode */ + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.ce_fw->data; + fw_data = (const __le32 *)(adev->gfx.ce_fw->data + + le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); + gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, + FIRMWARE_ID_CP_CE, + fw_data, fw_size); + + /* me ucode */ + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.me_fw->data; + fw_data = (const __le32 *)(adev->gfx.me_fw->data + + le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); + gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, + FIRMWARE_ID_CP_ME, + fw_data, fw_size); + + /* rlc ucode */ + rlc_hdr = (const struct rlc_firmware_header_v2_0 *) + adev->gfx.rlc_fw->data; + fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + + le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes); + gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, + FIRMWARE_ID_RLC_G_UCODE, + fw_data, fw_size); + + /* mec1 ucode */ + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.mec_fw->data; + fw_data = (const __le32 *) (adev->gfx.mec_fw->data + + le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - + cp_hdr->jt_size * 4; + gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, + FIRMWARE_ID_CP_MEC, + fw_data, fw_size); + /* mec2 ucode is not necessary if mec2 ucode is same as mec1 */ +} + +/* Temporarily put sdma part here */ +static void gfx_v10_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev) +{ + const __le32 *fw_data; + uint32_t fw_size; + const struct sdma_firmware_header_v1_0 *sdma_hdr; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + sdma_hdr = (const struct sdma_firmware_header_v1_0 *) + adev->sdma.instance[i].fw->data; + fw_data = (const __le32 *) (adev->sdma.instance[i].fw->data + + le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(sdma_hdr->header.ucode_size_bytes); + + if (i == 0) { + gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, + FIRMWARE_ID_SDMA0_UCODE, fw_data, fw_size); + gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, + FIRMWARE_ID_SDMA0_JT, + (uint32_t *)fw_data + + sdma_hdr->jt_offset, + sdma_hdr->jt_size * 4); + } else if (i == 1) { + gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, + FIRMWARE_ID_SDMA1_UCODE, fw_data, fw_size); + gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, + FIRMWARE_ID_SDMA1_JT, + (uint32_t *)fw_data + + sdma_hdr->jt_offset, + sdma_hdr->jt_size * 4); + } + } +} + +static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) +{ + uint32_t rlc_g_offset, rlc_g_size, tmp; + uint64_t gpu_addr; + + gfx_v10_0_rlc_backdoor_autoload_copy_toc_ucode(adev); + gfx_v10_0_rlc_backdoor_autoload_copy_sdma_ucode(adev); + gfx_v10_0_rlc_backdoor_autoload_copy_gfx_ucode(adev); + + rlc_g_offset = rlc_autoload_info[FIRMWARE_ID_RLC_G_UCODE].offset; + rlc_g_size = rlc_autoload_info[FIRMWARE_ID_RLC_G_UCODE].size; + gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset; + + WREG32_SOC15(GC, 0, mmRLC_HYP_BOOTLOAD_ADDR_HI, upper_32_bits(gpu_addr)); + WREG32_SOC15(GC, 0, mmRLC_HYP_BOOTLOAD_ADDR_LO, lower_32_bits(gpu_addr)); + WREG32_SOC15(GC, 0, mmRLC_HYP_BOOTLOAD_SIZE, rlc_g_size); + + tmp = RREG32_SOC15(GC, 0, mmRLC_HYP_RESET_VECTOR); + if (!(tmp & (RLC_HYP_RESET_VECTOR__COLD_BOOT_EXIT_MASK | + RLC_HYP_RESET_VECTOR__VDDGFX_EXIT_MASK))) { + DRM_ERROR("Neither COLD_BOOT_EXIT nor VDDGFX_EXIT is set\n"); + return -EINVAL; + } + + tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL); + if (tmp & RLC_CNTL__RLC_ENABLE_F32_MASK) { + DRM_ERROR("RLC ROM should halt itself\n"); + return -EINVAL; + } + + return 0; +} + +static int gfx_v10_0_rlc_backdoor_autoload_config_me_cache(struct amdgpu_device *adev) +{ + uint32_t usec_timeout = 50000; /* wait for 50ms */ + uint32_t tmp; + int i; + uint64_t addr; + + /* Trigger an invalidation of the L1 instruction caches */ + tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL); + tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL, tmp); + + /* Wait for invalidation complete */ + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, + INVALIDATE_CACHE_COMPLETE)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to invalidate instruction cache\n"); + return -EINVAL; + } + + /* Program me ucode address into intruction cache address register */ + addr = adev->gfx.rlc.rlc_autoload_gpu_addr + + rlc_autoload_info[FIRMWARE_ID_CP_ME].offset; + WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_LO, + lower_32_bits(addr) & 0xFFFFF000); + WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_HI, + upper_32_bits(addr)); + + return 0; +} + +static int gfx_v10_0_rlc_backdoor_autoload_config_ce_cache(struct amdgpu_device *adev) +{ + uint32_t usec_timeout = 50000; /* wait for 50ms */ + uint32_t tmp; + int i; + uint64_t addr; + + /* Trigger an invalidation of the L1 instruction caches */ + tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL); + tmp = REG_SET_FIELD(tmp, CP_CE_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL, tmp); + + /* Wait for invalidation complete */ + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_CE_IC_OP_CNTL, + INVALIDATE_CACHE_COMPLETE)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to invalidate instruction cache\n"); + return -EINVAL; + } + + /* Program ce ucode address into intruction cache address register */ + addr = adev->gfx.rlc.rlc_autoload_gpu_addr + + rlc_autoload_info[FIRMWARE_ID_CP_CE].offset; + WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_LO, + lower_32_bits(addr) & 0xFFFFF000); + WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_HI, + upper_32_bits(addr)); + + return 0; +} + +static int gfx_v10_0_rlc_backdoor_autoload_config_pfp_cache(struct amdgpu_device *adev) +{ + uint32_t usec_timeout = 50000; /* wait for 50ms */ + uint32_t tmp; + int i; + uint64_t addr; + + /* Trigger an invalidation of the L1 instruction caches */ + tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL); + tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL, tmp); + + /* Wait for invalidation complete */ + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, + INVALIDATE_CACHE_COMPLETE)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to invalidate instruction cache\n"); + return -EINVAL; + } + + /* Program pfp ucode address into intruction cache address register */ + addr = adev->gfx.rlc.rlc_autoload_gpu_addr + + rlc_autoload_info[FIRMWARE_ID_CP_PFP].offset; + WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_LO, + lower_32_bits(addr) & 0xFFFFF000); + WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_HI, + upper_32_bits(addr)); + + return 0; +} + +static int gfx_v10_0_rlc_backdoor_autoload_config_mec_cache(struct amdgpu_device *adev) +{ + uint32_t usec_timeout = 50000; /* wait for 50ms */ + uint32_t tmp; + int i; + uint64_t addr; + + /* Trigger an invalidation of the L1 instruction caches */ + tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL); + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL, tmp); + + /* Wait for invalidation complete */ + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, + INVALIDATE_CACHE_COMPLETE)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to invalidate instruction cache\n"); + return -EINVAL; + } + + /* Program mec1 ucode address into intruction cache address register */ + addr = adev->gfx.rlc.rlc_autoload_gpu_addr + + rlc_autoload_info[FIRMWARE_ID_CP_MEC].offset; + WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, + lower_32_bits(addr) & 0xFFFFF000); + WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, + upper_32_bits(addr)); + + return 0; +} + +static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) +{ + uint32_t cp_status; + uint32_t bootload_status; + int i, r; + + for (i = 0; i < adev->usec_timeout; i++) { + cp_status = RREG32_SOC15(GC, 0, mmCP_STAT); + bootload_status = RREG32_SOC15(GC, 0, mmRLC_RLCS_BOOTLOAD_STATUS); + if ((cp_status == 0) && + (REG_GET_FIELD(bootload_status, + RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) { + break; + } + udelay(1); + } + + if (i >= adev->usec_timeout) { + dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n"); + return -ETIMEDOUT; + } + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { + r = gfx_v10_0_rlc_backdoor_autoload_config_me_cache(adev); + if (r) + return r; + + r = gfx_v10_0_rlc_backdoor_autoload_config_ce_cache(adev); + if (r) + return r; + + r = gfx_v10_0_rlc_backdoor_autoload_config_pfp_cache(adev); + if (r) + return r; + + r = gfx_v10_0_rlc_backdoor_autoload_config_mec_cache(adev); + if (r) + return r; + } + + return 0; +} + +static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) +{ + int i; + u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); + + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); + if (!enable) { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) + adev->gfx.gfx_ring[i].sched.ready = false; + } + WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); + + for (i = 0; i < adev->usec_timeout; i++) { + if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt"); + + return 0; +} + +static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) +{ + int r; + const struct gfx_firmware_header_v1_0 *pfp_hdr; + const __le32 *fw_data; + unsigned i, fw_size; + uint32_t tmp; + uint32_t usec_timeout = 50000; /* wait for 50ms */ + + pfp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.pfp_fw->data; + + amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); + + fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes); + + r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.pfp.pfp_fw_obj, + &adev->gfx.pfp.pfp_fw_gpu_addr, + (void **)&adev->gfx.pfp.pfp_fw_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r); + gfx_v10_0_pfp_fini(adev); + return r; + } + + memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size); + + amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); + amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); + + /* Trigger an invalidation of the L1 instruction caches */ + tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL); + tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL, tmp); + + /* Wait for invalidation complete */ + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, + INVALIDATE_CACHE_COMPLETE)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to invalidate instruction cache\n"); + return -EINVAL; + } + + if (amdgpu_emu_mode == 1) + adev->nbio.funcs->hdp_flush(adev, NULL); + + tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL); + tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); + tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); + tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); + tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1); + WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL, tmp); + WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_LO, + adev->gfx.pfp.pfp_fw_gpu_addr & 0xFFFFF000); + WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_HI, + upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); + + return 0; +} + +static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev) +{ + int r; + const struct gfx_firmware_header_v1_0 *ce_hdr; + const __le32 *fw_data; + unsigned i, fw_size; + uint32_t tmp; + uint32_t usec_timeout = 50000; /* wait for 50ms */ + + ce_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.ce_fw->data; + + amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); + + fw_data = (const __le32 *)(adev->gfx.ce_fw->data + + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes); + + r = amdgpu_bo_create_reserved(adev, ce_hdr->header.ucode_size_bytes, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.ce.ce_fw_obj, + &adev->gfx.ce.ce_fw_gpu_addr, + (void **)&adev->gfx.ce.ce_fw_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create ce fw bo\n", r); + gfx_v10_0_ce_fini(adev); + return r; + } + + memcpy(adev->gfx.ce.ce_fw_ptr, fw_data, fw_size); + + amdgpu_bo_kunmap(adev->gfx.ce.ce_fw_obj); + amdgpu_bo_unreserve(adev->gfx.ce.ce_fw_obj); + + /* Trigger an invalidation of the L1 instruction caches */ + tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL); + tmp = REG_SET_FIELD(tmp, CP_CE_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL, tmp); + + /* Wait for invalidation complete */ + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_CE_IC_OP_CNTL, + INVALIDATE_CACHE_COMPLETE)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to invalidate instruction cache\n"); + return -EINVAL; + } + + if (amdgpu_emu_mode == 1) + adev->nbio.funcs->hdp_flush(adev, NULL); + + tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL); + tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0); + tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, CACHE_POLICY, 0); + tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, EXE_DISABLE, 0); + tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, ADDRESS_CLAMP, 1); + WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_LO, + adev->gfx.ce.ce_fw_gpu_addr & 0xFFFFF000); + WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_HI, + upper_32_bits(adev->gfx.ce.ce_fw_gpu_addr)); + + return 0; +} + +static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) +{ + int r; + const struct gfx_firmware_header_v1_0 *me_hdr; + const __le32 *fw_data; + unsigned i, fw_size; + uint32_t tmp; + uint32_t usec_timeout = 50000; /* wait for 50ms */ + + me_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.me_fw->data; + + amdgpu_ucode_print_gfx_hdr(&me_hdr->header); + + fw_data = (const __le32 *)(adev->gfx.me_fw->data + + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes); + + r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.me.me_fw_obj, + &adev->gfx.me.me_fw_gpu_addr, + (void **)&adev->gfx.me.me_fw_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create me fw bo\n", r); + gfx_v10_0_me_fini(adev); + return r; + } + + memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size); + + amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); + amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); + + /* Trigger an invalidation of the L1 instruction caches */ + tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL); + tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL, tmp); + + /* Wait for invalidation complete */ + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, + INVALIDATE_CACHE_COMPLETE)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to invalidate instruction cache\n"); + return -EINVAL; + } + + if (amdgpu_emu_mode == 1) + adev->nbio.funcs->hdp_flush(adev, NULL); + + tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL); + tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); + tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); + tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); + tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1); + WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_LO, + adev->gfx.me.me_fw_gpu_addr & 0xFFFFF000); + WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_HI, + upper_32_bits(adev->gfx.me.me_fw_gpu_addr)); + + return 0; +} + +static int gfx_v10_0_cp_gfx_load_microcode(struct amdgpu_device *adev) +{ + int r; + + if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) + return -EINVAL; + + gfx_v10_0_cp_gfx_enable(adev, false); + + r = gfx_v10_0_cp_gfx_load_pfp_microcode(adev); + if (r) { + dev_err(adev->dev, "(%d) failed to load pfp fw\n", r); + return r; + } + + r = gfx_v10_0_cp_gfx_load_ce_microcode(adev); + if (r) { + dev_err(adev->dev, "(%d) failed to load ce fw\n", r); + return r; + } + + r = gfx_v10_0_cp_gfx_load_me_microcode(adev); + if (r) { + dev_err(adev->dev, "(%d) failed to load me fw\n", r); + return r; + } + + return 0; +} + +static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + int r, i; + int ctx_reg_offset; + + /* init the CP */ + WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, + adev->gfx.config.max_hw_contexts - 1); + WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); + + gfx_v10_0_cp_gfx_enable(adev, true); + + ring = &adev->gfx.gfx_ring[0]; + r = amdgpu_ring_alloc(ring, gfx_v10_0_get_csb_size(adev) + 4); + if (r) { + DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); + return r; + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); + + amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); + amdgpu_ring_write(ring, 0x80000000); + amdgpu_ring_write(ring, 0x80000000); + + for (sect = gfx10_cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) { + amdgpu_ring_write(ring, + PACKET3(PACKET3_SET_CONTEXT_REG, + ext->reg_count)); + amdgpu_ring_write(ring, ext->reg_index - + PACKET3_SET_CONTEXT_REG_START); + for (i = 0; i < ext->reg_count; i++) + amdgpu_ring_write(ring, ext->extent[i]); + } + } + } + + ctx_reg_offset = + SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + amdgpu_ring_write(ring, ctx_reg_offset); + amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override); + + amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); + + amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); + amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); + amdgpu_ring_write(ring, 0x8000); + amdgpu_ring_write(ring, 0x8000); + + amdgpu_ring_commit(ring); + + /* submit cs packet to copy state 0 to next available state */ + ring = &adev->gfx.gfx_ring[1]; + r = amdgpu_ring_alloc(ring, 2); + if (r) { + DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); + return r; + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_commit(ring); + + return 0; +} + +static void gfx_v10_0_cp_gfx_switch_pipe(struct amdgpu_device *adev, + CP_PIPE_ID pipe) +{ + u32 tmp; + + tmp = RREG32_SOC15(GC, 0, mmGRBM_GFX_CNTL); + tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe); + + WREG32_SOC15(GC, 0, mmGRBM_GFX_CNTL, tmp); +} + +static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + u32 tmp; + + tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); + if (ring->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 1); + } else { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 0); + } + WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); + tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, + DOORBELL_RANGE_LOWER, ring->doorbell_index); + WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); + + WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, + CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); +} + +static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + u32 tmp; + u32 rb_bufsz; + u64 rb_addr, rptr_addr, wptr_gpu_addr; + u32 i; + + /* Set the write pointer delay */ + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); + + /* set the RB to use vmid 0 */ + WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); + + /* Init gfx ring 0 for pipe 0 */ + mutex_lock(&adev->srbm_mutex); + gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0); + mutex_unlock(&adev->srbm_mutex); + /* Set ring buffer size */ + ring = &adev->gfx.gfx_ring[0]; + rb_bufsz = order_base_2(ring->ring_size / 8); + tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); +#ifdef __BIG_ENDIAN + tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); +#endif + WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); + + /* Initialize the ring buffer's write pointers */ + ring->wptr = 0; + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); + + /* set the wb address wether it's enabled or not */ + rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); + WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & + CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); + + wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, + lower_32_bits(wptr_gpu_addr)); + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, + upper_32_bits(wptr_gpu_addr)); + + mdelay(1); + WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); + + rb_addr = ring->gpu_addr >> 8; + WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); + WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); + + WREG32_SOC15(GC, 0, mmCP_RB_ACTIVE, 1); + + gfx_v10_0_cp_gfx_set_doorbell(adev, ring); + + /* Init gfx ring 1 for pipe 1 */ + mutex_lock(&adev->srbm_mutex); + gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1); + mutex_unlock(&adev->srbm_mutex); + ring = &adev->gfx.gfx_ring[1]; + rb_bufsz = order_base_2(ring->ring_size / 8); + tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2); + WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp); + /* Initialize the ring buffer's write pointers */ + ring->wptr = 0; + WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr)); + /* Set the wb address wether it's enabled or not */ + rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); + WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & + CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); + wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, + lower_32_bits(wptr_gpu_addr)); + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, + upper_32_bits(wptr_gpu_addr)); + + mdelay(1); + WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp); + + rb_addr = ring->gpu_addr >> 8; + WREG32_SOC15(GC, 0, mmCP_RB1_BASE, rb_addr); + WREG32_SOC15(GC, 0, mmCP_RB1_BASE_HI, upper_32_bits(rb_addr)); + WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1); + + gfx_v10_0_cp_gfx_set_doorbell(adev, ring); + + /* Switch to pipe 0 */ + mutex_lock(&adev->srbm_mutex); + gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0); + mutex_unlock(&adev->srbm_mutex); + + /* start the ring */ + gfx_v10_0_cp_gfx_start(adev); + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + ring->sched.ready = true; + } + + return 0; +} + +static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) +{ + int i; + + if (enable) { + WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0); + } else { + WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, + (CP_MEC_CNTL__MEC_ME1_HALT_MASK | + CP_MEC_CNTL__MEC_ME2_HALT_MASK)); + for (i = 0; i < adev->gfx.num_compute_rings; i++) + adev->gfx.compute_ring[i].sched.ready = false; + adev->gfx.kiq.ring.sched.ready = false; + } + udelay(50); +} + +static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev) +{ + const struct gfx_firmware_header_v1_0 *mec_hdr; + const __le32 *fw_data; + unsigned i; + u32 tmp; + u32 usec_timeout = 50000; /* Wait for 50 ms */ + + if (!adev->gfx.mec_fw) + return -EINVAL; + + gfx_v10_0_cp_compute_enable(adev, false); + + mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); + + fw_data = (const __le32 *) + (adev->gfx.mec_fw->data + + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); + + /* Trigger an invalidation of the L1 instruction caches */ + tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL); + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL, tmp); + + /* Wait for invalidation complete */ + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, + INVALIDATE_CACHE_COMPLETE)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to invalidate instruction cache\n"); + return -EINVAL; + } + + if (amdgpu_emu_mode == 1) + adev->nbio.funcs->hdp_flush(adev, NULL); + + tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL); + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1); + WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); + + WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr & + 0xFFFFF000); + WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, + upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); + + /* MEC1 */ + WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 0); + + for (i = 0; i < mec_hdr->jt_size; i++) + WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, + le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); + + WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); + + /* + * TODO: Loading MEC2 firmware is only necessary if MEC2 should run + * different microcode than MEC1. + */ + + return 0; +} + +static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring) +{ + uint32_t tmp; + struct amdgpu_device *adev = ring->adev; + + /* tell RLC which is KIQ queue */ + tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); + tmp &= 0xffffff00; + tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); + WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + tmp |= 0x80; + WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); +} + +static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v10_gfx_mqd *mqd = ring->mqd_ptr; + uint64_t hqd_gpu_addr, wb_gpu_addr; + uint32_t tmp; + uint32_t rb_bufsz; + + /* set up gfx hqd wptr */ + mqd->cp_gfx_hqd_wptr = 0; + mqd->cp_gfx_hqd_wptr_hi = 0; + + /* set the pointer to the MQD */ + mqd->cp_mqd_base_addr = ring->mqd_gpu_addr & 0xfffffffc; + mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); + + /* set up mqd control */ + tmp = RREG32_SOC15(GC, 0, mmCP_GFX_MQD_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); + tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); + mqd->cp_gfx_mqd_control = tmp; + + /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ + tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_VMID); + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); + mqd->cp_gfx_hqd_vmid = 0; + + /* set up default queue priority level + * 0x0 = low priority, 0x1 = high priority */ + tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY); + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0); + mqd->cp_gfx_hqd_queue_priority = tmp; + + /* set up time quantum */ + tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM); + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); + mqd->cp_gfx_hqd_quantum = tmp; + + /* set up gfx hqd base. this is similar as CP_RB_BASE */ + hqd_gpu_addr = ring->gpu_addr >> 8; + mqd->cp_gfx_hqd_base = hqd_gpu_addr; + mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr); + + /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc; + mqd->cp_gfx_hqd_rptr_addr_hi = + upper_32_bits(wb_gpu_addr) & 0xffff; + + /* set up rb_wptr_poll addr */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; + + /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ + rb_bufsz = order_base_2(ring->ring_size / 4) - 1; + tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_CNTL); + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); +#ifdef __BIG_ENDIAN + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); +#endif + mqd->cp_gfx_hqd_cntl = tmp; + + /* set up cp_doorbell_control */ + tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); + if (ring->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 1); + } else + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 0); + mqd->cp_rb_doorbell_control = tmp; + + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + ring->wptr = 0; + mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR); + + /* active the queue */ + mqd->cp_gfx_hqd_active = 1; + + return 0; +} + +#ifdef BRING_UP_DEBUG +static int gfx_v10_0_gfx_queue_init_register(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v10_gfx_mqd *mqd = ring->mqd_ptr; + + /* set mmCP_GFX_HQD_WPTR/_HI to 0 */ + WREG32_SOC15(GC, 0, mmCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr); + WREG32_SOC15(GC, 0, mmCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi); + + /* set GFX_MQD_BASE */ + WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr); + WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); + + /* set GFX_MQD_CONTROL */ + WREG32_SOC15(GC, 0, mmCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control); + + /* set GFX_HQD_VMID to 0 */ + WREG32_SOC15(GC, 0, mmCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid); + + WREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY, + mqd->cp_gfx_hqd_queue_priority); + WREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum); + + /* set GFX_HQD_BASE, similar as CP_RB_BASE */ + WREG32_SOC15(GC, 0, mmCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base); + WREG32_SOC15(GC, 0, mmCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi); + + /* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */ + WREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr); + WREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi); + + /* set GFX_HQD_CNTL, similar as CP_RB_CNTL */ + WREG32_SOC15(GC, 0, mmCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl); + + /* set RB_WPTR_POLL_ADDR */ + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo); + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi); + + /* set RB_DOORBELL_CONTROL */ + WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control); + + /* active the queue */ + WREG32_SOC15(GC, 0, mmCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active); + + return 0; +} +#endif + +static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v10_gfx_mqd *mqd = ring->mqd_ptr; + int mqd_idx = ring - &adev->gfx.gfx_ring[0]; + + if (!adev->in_gpu_reset && !adev->in_suspend) { + memset((void *)mqd, 0, sizeof(*mqd)); + mutex_lock(&adev->srbm_mutex); + nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v10_0_gfx_mqd_init(ring); +#ifdef BRING_UP_DEBUG + gfx_v10_0_gfx_queue_init_register(ring); +#endif + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + if (adev->gfx.me.mqd_backup[mqd_idx]) + memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + } else if (adev->in_gpu_reset) { + /* reset mqd with the backup copy */ + if (adev->gfx.me.mqd_backup[mqd_idx]) + memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); + /* reset the ring */ + ring->wptr = 0; + adev->wb.wb[ring->wptr_offs] = 0; + amdgpu_ring_clear_ring(ring); +#ifdef BRING_UP_DEBUG + mutex_lock(&adev->srbm_mutex); + nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v10_0_gfx_queue_init_register(ring); + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +#endif + } else { + amdgpu_ring_clear_ring(ring); + } + + return 0; +} + +#ifndef BRING_UP_DEBUG +static int gfx_v10_0_kiq_enable_kgq(struct amdgpu_device *adev) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + int r, i; + + if (!kiq->pmf || !kiq->pmf->kiq_map_queues) + return -EINVAL; + + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * + adev->gfx.num_gfx_rings); + if (r) { + DRM_ERROR("Failed to lock KIQ (%d).\n", r); + return r; + } + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) + kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) { + DRM_ERROR("kfq enable failed\n"); + kiq_ring->sched.ready = false; + } + return r; +} +#endif + +static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) +{ + int r, i; + struct amdgpu_ring *ring; + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + goto done; + + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v10_0_gfx_init_queue(ring); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) + goto done; + } +#ifndef BRING_UP_DEBUG + r = gfx_v10_0_kiq_enable_kgq(adev); + if (r) + goto done; +#endif + r = gfx_v10_0_cp_gfx_start(adev); + if (r) + goto done; + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + ring->sched.ready = true; + } +done: + return r; +} + +static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v10_compute_mqd *mqd = ring->mqd_ptr; + uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; + uint32_t tmp; + + mqd->header = 0xC0310800; + mqd->compute_pipelinestat_enable = 0x00000001; + mqd->compute_static_thread_mgmt_se0 = 0xffffffff; + mqd->compute_static_thread_mgmt_se1 = 0xffffffff; + mqd->compute_static_thread_mgmt_se2 = 0xffffffff; + mqd->compute_static_thread_mgmt_se3 = 0xffffffff; + mqd->compute_misc_reserved = 0x00000003; + + eop_base_addr = ring->eop_gpu_addr >> 8; + mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; + mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, + (order_base_2(GFX10_MEC_HPD_SIZE / 4) - 1)); + + mqd->cp_hqd_eop_control = tmp; + + /* enable doorbell? */ + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); + + if (ring->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_SOURCE, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_HIT, 0); + } else { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 0); + } + + mqd->cp_hqd_pq_doorbell_control = tmp; + + /* disable the queue if it's active */ + ring->wptr = 0; + mqd->cp_hqd_dequeue_request = 0; + mqd->cp_hqd_pq_rptr = 0; + mqd->cp_hqd_pq_wptr_lo = 0; + mqd->cp_hqd_pq_wptr_hi = 0; + + /* set the pointer to the MQD */ + mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; + mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); + + /* set MQD vmid to 0 */ + tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); + mqd->cp_mqd_control = tmp; + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + hqd_gpu_addr = ring->gpu_addr >> 8; + mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; + mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, + (order_base_2(ring->ring_size / 4) - 1)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, + ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); +#ifdef __BIG_ENDIAN + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); +#endif + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + mqd->cp_hqd_pq_control = tmp; + + /* set the wb address whether it's enabled or not */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_rptr_report_addr_hi = + upper_32_bits(wb_gpu_addr) & 0xffff; + + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; + + tmp = 0; + /* enable the doorbell if requested */ + if (ring->use_doorbell) { + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_SOURCE, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_HIT, 0); + } + + mqd->cp_hqd_pq_doorbell_control = tmp; + + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + ring->wptr = 0; + mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); + + /* set the vmid for the queue */ + mqd->cp_hqd_vmid = 0; + + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); + tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); + mqd->cp_hqd_persistent_state = tmp; + + /* set MIN_IB_AVAIL_SIZE */ + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); + mqd->cp_hqd_ib_control = tmp; + + /* activate the queue */ + mqd->cp_hqd_active = 1; + + return 0; +} + +static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v10_compute_mqd *mqd = ring->mqd_ptr; + int j; + + /* disable wptr polling */ + WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); + + /* write the EOP addr */ + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, + mqd->cp_hqd_eop_base_addr_lo); + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, + mqd->cp_hqd_eop_base_addr_hi); + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, + mqd->cp_hqd_eop_control); + + /* enable doorbell? */ + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, + mqd->cp_hqd_pq_doorbell_control); + + /* disable the queue if it's active */ + if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { + WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); + for (j = 0; j < adev->usec_timeout; j++) { + if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, + mqd->cp_hqd_dequeue_request); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, + mqd->cp_hqd_pq_rptr); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, + mqd->cp_hqd_pq_wptr_lo); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, + mqd->cp_hqd_pq_wptr_hi); + } + + /* set the pointer to the MQD */ + WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, + mqd->cp_mqd_base_addr_lo); + WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, + mqd->cp_mqd_base_addr_hi); + + /* set MQD vmid to 0 */ + WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, + mqd->cp_mqd_control); + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, + mqd->cp_hqd_pq_base_lo); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, + mqd->cp_hqd_pq_base_hi); + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, + mqd->cp_hqd_pq_control); + + /* set the wb address whether it's enabled or not */ + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, + mqd->cp_hqd_pq_rptr_report_addr_lo); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, + mqd->cp_hqd_pq_rptr_report_addr_hi); + + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, + mqd->cp_hqd_pq_wptr_poll_addr_lo); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, + mqd->cp_hqd_pq_wptr_poll_addr_hi); + + /* enable the doorbell if requested */ + if (ring->use_doorbell) { + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, + (adev->doorbell_index.kiq * 2) << 2); + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, + (adev->doorbell_index.userqueue_end * 2) << 2); + } + + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, + mqd->cp_hqd_pq_doorbell_control); + + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, + mqd->cp_hqd_pq_wptr_lo); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, + mqd->cp_hqd_pq_wptr_hi); + + /* set the vmid for the queue */ + WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); + + WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, + mqd->cp_hqd_persistent_state); + + /* activate the queue */ + WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, + mqd->cp_hqd_active); + + if (ring->use_doorbell) + WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); + + return 0; +} + +static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v10_compute_mqd *mqd = ring->mqd_ptr; + int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; + + gfx_v10_0_kiq_setting(ring); + + if (adev->in_gpu_reset) { /* for GPU_RESET case */ + /* reset MQD to a clean status */ + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); + + /* reset ring buffer */ + ring->wptr = 0; + amdgpu_ring_clear_ring(ring); + + mutex_lock(&adev->srbm_mutex); + nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v10_0_kiq_init_register(ring); + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } else { + memset((void *)mqd, 0, sizeof(*mqd)); + mutex_lock(&adev->srbm_mutex); + nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v10_0_compute_mqd_init(ring); + gfx_v10_0_kiq_init_register(ring); + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + } + + return 0; +} + +static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v10_compute_mqd *mqd = ring->mqd_ptr; + int mqd_idx = ring - &adev->gfx.compute_ring[0]; + + if (!adev->in_gpu_reset && !adev->in_suspend) { + memset((void *)mqd, 0, sizeof(*mqd)); + mutex_lock(&adev->srbm_mutex); + nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v10_0_compute_mqd_init(ring); + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ + /* reset MQD to a clean status */ + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); + + /* reset ring buffer */ + ring->wptr = 0; + amdgpu_ring_clear_ring(ring); + } else { + amdgpu_ring_clear_ring(ring); + } + + return 0; +} + +static int gfx_v10_0_kiq_resume(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + int r; + + ring = &adev->gfx.kiq.ring; + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + return r; + + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (unlikely(r != 0)) + return r; + + gfx_v10_0_kiq_init_queue(ring); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + amdgpu_bo_unreserve(ring->mqd_obj); + ring->sched.ready = true; + return 0; +} + +static int gfx_v10_0_kcq_resume(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = NULL; + int r = 0, i; + + gfx_v10_0_cp_compute_enable(adev, true); + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + goto done; + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v10_0_kcq_init_queue(ring); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) + goto done; + } + + r = amdgpu_gfx_enable_kcq(adev); +done: + return r; +} + +static int gfx_v10_0_cp_resume(struct amdgpu_device *adev) +{ + int r, i; + struct amdgpu_ring *ring; + + if (!(adev->flags & AMD_IS_APU)) + gfx_v10_0_enable_gui_idle_interrupt(adev, false); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + /* legacy firmware loading */ + r = gfx_v10_0_cp_gfx_load_microcode(adev); + if (r) + return r; + + r = gfx_v10_0_cp_compute_load_microcode(adev); + if (r) + return r; + } + + r = gfx_v10_0_kiq_resume(adev); + if (r) + return r; + + r = gfx_v10_0_kcq_resume(adev); + if (r) + return r; + + if (!amdgpu_async_gfx_ring) { + r = gfx_v10_0_cp_gfx_resume(adev); + if (r) + return r; + } else { + r = gfx_v10_0_cp_async_gfx_ring_resume(adev); + if (r) + return r; + } + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + DRM_INFO("gfx %d ring me %d pipe %d q %d\n", + i, ring->me, ring->pipe, ring->queue); + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->sched.ready = false; + return r; + } + } + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + ring->sched.ready = true; + DRM_INFO("compute ring %d mec %d pipe %d q %d\n", + i, ring->me, ring->pipe, ring->queue); + r = amdgpu_ring_test_ring(ring); + if (r) + ring->sched.ready = false; + } + + return 0; +} + +static void gfx_v10_0_cp_enable(struct amdgpu_device *adev, bool enable) +{ + gfx_v10_0_cp_gfx_enable(adev, enable); + gfx_v10_0_cp_compute_enable(adev, enable); +} + +static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev) +{ + uint32_t data, pattern = 0xDEADBEEF; + + /* check if mmVGT_ESGS_RING_SIZE_UMD + * has been remapped to mmVGT_ESGS_RING_SIZE */ + data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE); + + WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, 0); + + WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern); + + if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE) == pattern) { + WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, data); + return true; + } else { + WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, data); + return false; + } +} + +static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev) +{ + uint32_t data; + + /* initialize cam_index to 0 + * index will auto-inc after each data writting */ + WREG32_SOC15(GC, 0, mmGRBM_CAM_INDEX, 0); + + /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */ + data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + + /* mmVGT_TF_MEMORY_BASE_UMD -> mmVGT_TF_MEMORY_BASE */ + data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_UMD) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + + /* mmVGT_TF_MEMORY_BASE_HI_UMD -> mmVGT_TF_MEMORY_BASE_HI */ + data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI_UMD) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + + /* mmVGT_HS_OFFCHIP_PARAM_UMD -> mmVGT_HS_OFFCHIP_PARAM */ + data = (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM_UMD) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + + /* mmVGT_ESGS_RING_SIZE_UMD -> mmVGT_ESGS_RING_SIZE */ + data = (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE_UMD) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + + /* mmVGT_GSVS_RING_SIZE_UMD -> mmVGT_GSVS_RING_SIZE */ + data = (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE_UMD) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + + /* mmSPI_CONFIG_CNTL_REMAP -> mmSPI_CONFIG_CNTL */ + data = (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_REMAP) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); +} + +static int gfx_v10_0_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = gfx_v10_0_csb_vram_pin(adev); + if (r) + return r; + + if (!amdgpu_emu_mode) + gfx_v10_0_init_golden_registers(adev); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + /** + * For gfx 10, rlc firmware loading relies on smu firmware is + * loaded firstly, so in direct type, it has to load smc ucode + * here before rlc. + */ + r = smu_load_microcode(&adev->smu); + if (r) + return r; + + r = smu_check_fw_status(&adev->smu); + if (r) { + pr_err("SMC firmware status is not correct\n"); + return r; + } + } + + /* if GRBM CAM not remapped, set up the remapping */ + if (!gfx_v10_0_check_grbm_cam_remapping(adev)) + gfx_v10_0_setup_grbm_cam_remapping(adev); + + gfx_v10_0_constants_init(adev); + + r = gfx_v10_0_rlc_resume(adev); + if (r) + return r; + + /* + * init golden registers and rlc resume may override some registers, + * reconfig them here + */ + gfx_v10_0_tcp_harvest(adev); + + r = gfx_v10_0_cp_resume(adev); + if (r) + return r; + + return r; +} + +#ifndef BRING_UP_DEBUG +static int gfx_v10_0_kiq_disable_kgq(struct amdgpu_device *adev) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *kiq_ring = &kiq->ring; + int i; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * + adev->gfx.num_gfx_rings)) + return -ENOMEM; + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) + kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], + PREEMPT_QUEUES, 0, 0); + + return amdgpu_ring_test_ring(kiq_ring); +} +#endif + +static int gfx_v10_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); +#ifndef BRING_UP_DEBUG + if (amdgpu_async_gfx_ring) { + r = gfx_v10_0_kiq_disable_kgq(adev); + if (r) + DRM_ERROR("KGQ disable failed\n"); + } +#endif + if (amdgpu_gfx_disable_kcq(adev)) + DRM_ERROR("KCQ disable failed\n"); + if (amdgpu_sriov_vf(adev)) { + pr_debug("For SRIOV client, shouldn't do anything.\n"); + return 0; + } + gfx_v10_0_cp_enable(adev, false); + gfx_v10_0_enable_gui_idle_interrupt(adev, false); + gfx_v10_0_csb_vram_unpin(adev); + + return 0; +} + +static int gfx_v10_0_suspend(void *handle) +{ + return gfx_v10_0_hw_fini(handle); +} + +static int gfx_v10_0_resume(void *handle) +{ + return gfx_v10_0_hw_init(handle); +} + +static bool gfx_v10_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), + GRBM_STATUS, GUI_ACTIVE)) + return false; + else + return true; +} + +static int gfx_v10_0_wait_for_idle(void *handle) +{ + unsigned i; + u32 tmp; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < adev->usec_timeout; i++) { + /* read MC_STATUS */ + tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS) & + GRBM_STATUS__GUI_ACTIVE_MASK; + + if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) + return 0; + udelay(1); + } + return -ETIMEDOUT; +} + +static int gfx_v10_0_soft_reset(void *handle) +{ + u32 grbm_soft_reset = 0; + u32 tmp; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* GRBM_STATUS */ + tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); + if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | + GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | + GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__DB_BUSY_MASK | + GRBM_STATUS__CB_BUSY_MASK | GRBM_STATUS__GDS_BUSY_MASK | + GRBM_STATUS__SPI_BUSY_MASK | GRBM_STATUS__GE_BUSY_NO_DMA_MASK + | GRBM_STATUS__BCI_BUSY_MASK)) { + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, SOFT_RESET_CP, + 1); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, SOFT_RESET_GFX, + 1); + } + + if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, SOFT_RESET_CP, + 1); + } + + /* GRBM_STATUS2 */ + tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); + if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, SOFT_RESET_RLC, + 1); + + if (grbm_soft_reset) { + /* stop the rlc */ + gfx_v10_0_rlc_stop(adev); + + /* Disable GFX parsing/prefetching */ + gfx_v10_0_cp_gfx_enable(adev, false); + + /* Disable MEC parsing/prefetching */ + gfx_v10_0_cp_compute_enable(adev, false); + + if (grbm_soft_reset) { + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + tmp |= grbm_soft_reset; + dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + + udelay(50); + + tmp &= ~grbm_soft_reset; + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + } + + /* Wait a little for things to settle down */ + udelay(50); + } + return 0; +} + +static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) +{ + uint64_t clock; + + mutex_lock(&adev->gfx.gpu_clock_mutex); + WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); + clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | + ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + mutex_unlock(&adev->gfx.gpu_clock_mutex); + return clock; +} + +static void gfx_v10_0_ring_emit_gds_switch(struct amdgpu_ring *ring, + uint32_t vmid, + uint32_t gds_base, uint32_t gds_size, + uint32_t gws_base, uint32_t gws_size, + uint32_t oa_base, uint32_t oa_size) +{ + struct amdgpu_device *adev = ring->adev; + + /* GDS Base */ + gfx_v10_0_write_data_to_reg(ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, + gds_base); + + /* GDS Size */ + gfx_v10_0_write_data_to_reg(ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, + gds_size); + + /* GWS */ + gfx_v10_0_write_data_to_reg(ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, + gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); + + /* OA */ + gfx_v10_0_write_data_to_reg(ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, + (1 << (oa_size + oa_base)) - (1 << oa_base)); +} + +static int gfx_v10_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS; + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; + + gfx_v10_0_set_kiq_pm4_funcs(adev); + gfx_v10_0_set_ring_funcs(adev); + gfx_v10_0_set_irq_funcs(adev); + gfx_v10_0_set_gds_init(adev); + gfx_v10_0_set_rlc_funcs(adev); + + return 0; +} + +static int gfx_v10_0_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); + if (r) + return r; + + r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); + if (r) + return r; + + return 0; +} + +static bool gfx_v10_0_is_rlc_enabled(struct amdgpu_device *adev) +{ + uint32_t rlc_cntl; + + /* if RLC is not enabled, do nothing */ + rlc_cntl = RREG32_SOC15(GC, 0, mmRLC_CNTL); + return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; +} + +static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev) +{ + uint32_t data; + unsigned i; + + data = RLC_SAFE_MODE__CMD_MASK; + data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); + WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); + + /* wait for RLC_SAFE_MODE */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) + break; + udelay(1); + } +} + +static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev) +{ + uint32_t data; + + data = RLC_SAFE_MODE__CMD_MASK; + WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); +} + +static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def; + + /* It is disabled by HW by default */ + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { + /* 1 - RLC_CGTT_MGCG_OVERRIDE */ + def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); + + /* only for Vega10 & Raven1 */ + data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; + + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); + + /* MGLS is a global flag to control all MGLS in GFX */ + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { + /* 2 - RLC memory Light sleep */ + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { + def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); + data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); + } + /* 3 - CP memory Light sleep */ + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { + def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); + data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; + if (def != data) + WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); + } + } + } else { + /* 1 - MGCG_OVERRIDE */ + def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); + + /* 2 - disable MGLS in RLC */ + data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); + if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { + data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; + WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); + } + + /* 3 - disable MGLS in CP */ + data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); + if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { + data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; + WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); + } + } +} + +static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def; + + /* Enable 3D CGCG/CGLS */ + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { + /* write cmd to clear cgcg/cgls ov */ + def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + /* unset CGCG override */ + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; + /* update CGCG and CGLS override bits */ + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); + /* enable 3Dcgcg FSM(0x0000363f) */ + def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); + data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) + data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | + RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); + + /* set IDLE_POLL_COUNT(0x00900100) */ + def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); + data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | + (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); + if (def != data) + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); + } else { + /* Disable CGCG/CGLS */ + def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); + /* disable cgcg, cgls should be disabled */ + data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | + RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); + /* disable cgcg and cgls in FSM */ + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); + } +} + +static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { + def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + /* unset CGCG override */ + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; + else + data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; + /* update CGCG and CGLS override bits */ + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); + + /* enable cgcg FSM(0x0000363F) */ + def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); + data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) + data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); + + /* set IDLE_POLL_COUNT(0x00900100) */ + def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); + data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | + (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); + if (def != data) + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); + } else { + def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); + /* reset CGCG/CGLS bits */ + data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); + /* disable cgcg and cgls in FSM */ + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); + } +} + +static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + amdgpu_gfx_rlc_enter_safe_mode(adev); + + if (enable) { + /* CGCG/CGLS should be enabled after MGCG/MGLS + * === MGCG + MGLS === + */ + gfx_v10_0_update_medium_grain_clock_gating(adev, enable); + /* === CGCG /CGLS for GFX 3D Only === */ + gfx_v10_0_update_3d_clock_gating(adev, enable); + /* === CGCG + CGLS === */ + gfx_v10_0_update_coarse_grain_clock_gating(adev, enable); + } else { + /* CGCG/CGLS should be disabled before MGCG/MGLS + * === CGCG + CGLS === + */ + gfx_v10_0_update_coarse_grain_clock_gating(adev, enable); + /* === CGCG /CGLS for GFX 3D Only === */ + gfx_v10_0_update_3d_clock_gating(adev, enable); + /* === MGCG + MGLS === */ + gfx_v10_0_update_medium_grain_clock_gating(adev, enable); + } + + if (adev->cg_flags & + (AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS)) + gfx_v10_0_enable_gui_idle_interrupt(adev, enable); + + amdgpu_gfx_rlc_exit_safe_mode(adev); + + return 0; +} + +static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = { + .is_rlc_enabled = gfx_v10_0_is_rlc_enabled, + .set_safe_mode = gfx_v10_0_set_safe_mode, + .unset_safe_mode = gfx_v10_0_unset_safe_mode, + .init = gfx_v10_0_rlc_init, + .get_csb_size = gfx_v10_0_get_csb_size, + .get_csb_buffer = gfx_v10_0_get_csb_buffer, + .resume = gfx_v10_0_rlc_resume, + .stop = gfx_v10_0_rlc_stop, + .reset = gfx_v10_0_rlc_reset, + .start = gfx_v10_0_rlc_start +}; + +static int gfx_v10_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_PG_STATE_GATE) ? true : false; + switch (adev->asic_type) { + case CHIP_NAVI10: + case CHIP_NAVI14: + if (!enable) { + amdgpu_gfx_off_ctrl(adev, false); + cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); + } else + amdgpu_gfx_off_ctrl(adev, true); + break; + default: + break; + } + return 0; +} + +static int gfx_v10_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + switch (adev->asic_type) { + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + gfx_v10_0_update_gfx_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: + break; + } + return 0; +} + +static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int data; + + /* AMD_CG_SUPPORT_GFX_MGCG */ + data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) + *flags |= AMD_CG_SUPPORT_GFX_MGCG; + + /* AMD_CG_SUPPORT_GFX_CGCG */ + data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); + if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_CGCG; + + /* AMD_CG_SUPPORT_GFX_CGLS */ + if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_CGLS; + + /* AMD_CG_SUPPORT_GFX_RLC_LS */ + data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); + if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; + + /* AMD_CG_SUPPORT_GFX_CP_LS */ + data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); + if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; + + /* AMD_CG_SUPPORT_GFX_3D_CGCG */ + data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); + if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; + + /* AMD_CG_SUPPORT_GFX_3D_CGLS */ + if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; +} + +static u64 gfx_v10_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) +{ + return ring->adev->wb.wb[ring->rptr_offs]; /* gfx10 is 32bit rptr*/ +} + +static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u64 wptr; + + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); + } else { + wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); + wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; + } + + return wptr; +} + +static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); + } +} + +static u64 gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring *ring) +{ + return ring->adev->wb.wb[ring->rptr_offs]; /* gfx10 hardware is 32bit rptr */ +} + +static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring) +{ + u64 wptr; + + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) + wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); + else + BUG(); + return wptr; +} + +static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + BUG(); /* only DOORBELL method supported on gfx10 now */ + } +} + +static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 ref_and_mask, reg_mem_engine; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; + + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { + switch (ring->me) { + case 1: + ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; + break; + case 2: + ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; + break; + default: + return; + } + reg_mem_engine = 0; + } else { + ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; + reg_mem_engine = 1; /* pfp */ + } + + gfx_v10_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, + adev->nbio.funcs->get_hdp_flush_req_offset(adev), + adev->nbio.funcs->get_hdp_flush_done_offset(adev), + ref_and_mask, ref_and_mask, 0x20); +} + +static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + u32 header, control = 0; + + if (ib->flags & AMDGPU_IB_FLAG_CE) + header = PACKET3(PACKET3_INDIRECT_BUFFER_CNST, 2); + else + header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); + + control |= ib->length_dw | (vmid << 24); + + if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { + control |= INDIRECT_BUFFER_PRE_ENB(1); + + if (flags & AMDGPU_IB_PREEMPTED) + control |= INDIRECT_BUFFER_PRE_RESUME(1); + + if (!(ib->flags & AMDGPU_IB_FLAG_CE)) + gfx_v10_0_ring_emit_de_meta(ring, + flags & AMDGPU_IB_PREEMPTED ? true : false); + } + + amdgpu_ring_write(ring, header); + BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + amdgpu_ring_write(ring, +#ifdef __BIG_ENDIAN + (2 << 0) | +#endif + lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, control); +} + +static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); + + /* Currently, there is a high possibility to get wave ID mismatch + * between ME and GDS, leading to a hw deadlock, because ME generates + * different wave IDs than the GDS expects. This situation happens + * randomly when at least 5 compute pipes use GDS ordered append. + * The wave IDs generated by ME are also wrong after suspend/resume. + * Those are probably bugs somewhere else in the kernel driver. + * + * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and + * GDS to 0 for this ring (me/pipe). + */ + if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); + amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + amdgpu_ring_write(ring, +#ifdef __BIG_ENDIAN + (2 << 0) | +#endif + lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, control); +} + +static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned flags) +{ + struct amdgpu_device *adev = ring->adev; + bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; + bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; + + /* Interrupt not work fine on GFX10.1 model yet. Use fallback instead */ + if (adev->pdev->device == 0x50) + int_sel = false; + + /* RELEASE_MEM - flush caches, send int */ + amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); + amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | + PACKET3_RELEASE_MEM_GCR_GL2_WB | + PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */ + PACKET3_RELEASE_MEM_GCR_GLM_WB | + PACKET3_RELEASE_MEM_CACHE_POLICY(3) | + PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | + PACKET3_RELEASE_MEM_EVENT_INDEX(5))); + amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) | + PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0))); + + /* + * the address should be Qword aligned if 64bit write, Dword + * aligned if only send 32bit data low (discard data high) + */ + if (write64bit) + BUG_ON(addr & 0x7); + else + BUG_ON(addr & 0x3); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + amdgpu_ring_write(ring, upper_32_bits(seq)); + amdgpu_ring_write(ring, 0); +} + +static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) +{ + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + uint32_t seq = ring->fence_drv.sync_seq; + uint64_t addr = ring->fence_drv.gpu_addr; + + gfx_v10_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr), + upper_32_bits(addr), seq, 0xffffffff, 4); +} + +static void gfx_v10_0_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* compute doesn't have PFP */ + if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { + /* sync PFP to ME, otherwise we might get invalid PFP reads */ + amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); + amdgpu_ring_write(ring, 0x0); + } +} + +static void gfx_v10_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned int flags) +{ + struct amdgpu_device *adev = ring->adev; + + /* we only allocate 32bit for each seq wb address */ + BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + /* write fence seq to the "addr" */ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + + if (flags & AMDGPU_FENCE_FLAG_INT) { + /* set register to trigger INT */ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); + amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ + } +} + +static void gfx_v10_0_ring_emit_sb(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); + amdgpu_ring_write(ring, 0); +} + +static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) +{ + uint32_t dw2 = 0; + + if (amdgpu_mcbp) + gfx_v10_0_ring_emit_ce_meta(ring, + flags & AMDGPU_IB_PREEMPTED ? true : false); + + gfx_v10_0_ring_emit_tmz(ring, true); + + dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ + if (flags & AMDGPU_HAVE_CTX_SWITCH) { + /* set load_global_config & load_global_uconfig */ + dw2 |= 0x8001; + /* set load_cs_sh_regs */ + dw2 |= 0x01000000; + /* set load_per_context_state & load_gfx_sh_regs for GFX */ + dw2 |= 0x10002; + + /* set load_ce_ram if preamble presented */ + if (AMDGPU_PREAMBLE_IB_PRESENT & flags) + dw2 |= 0x10000000; + } else { + /* still load_ce_ram if this is the first time preamble presented + * although there is no context switch happens. + */ + if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) + dw2 |= 0x10000000; + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); + amdgpu_ring_write(ring, dw2); + amdgpu_ring_write(ring, 0); +} + +static unsigned gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) +{ + unsigned ret; + + amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); + amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ + ret = ring->wptr & ring->buf_mask; + amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ + + return ret; +} + +static void gfx_v10_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) +{ + unsigned cur; + BUG_ON(offset > ring->buf_mask); + BUG_ON(ring->ring[offset] != 0x55aa55aa); + + cur = (ring->wptr - 1) & ring->buf_mask; + if (likely(cur > offset)) + ring->ring[offset] = cur - offset; + else + ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; +} + +static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring) +{ + int i, r = 0; + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *kiq_ring = &kiq->ring; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) + return -ENOMEM; + + /* assert preemption condition */ + amdgpu_ring_set_preempt_cond_exec(ring, false); + + /* assert IB preemption, emit the trailing fence */ + kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, + ring->trail_fence_gpu_addr, + ++ring->trail_seq); + amdgpu_ring_commit(kiq_ring); + + /* poll the trailing fence */ + for (i = 0; i < adev->usec_timeout; i++) { + if (ring->trail_seq == + le32_to_cpu(*(ring->trail_fence_cpu_addr))) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) { + r = -EINVAL; + DRM_ERROR("ring %d failed to preempt ib\n", ring->idx); + } + + /* deassert preemption condition */ + amdgpu_ring_set_preempt_cond_exec(ring, true); + return r; +} + +static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) +{ + struct amdgpu_device *adev = ring->adev; + struct v10_ce_ib_state ce_payload = {0}; + uint64_t csa_addr; + int cnt; + + cnt = (sizeof(ce_payload) >> 2) + 4 - 2; + csa_addr = amdgpu_csa_vaddr(ring->adev); + + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | + WRITE_DATA_DST_SEL(8) | + WR_CONFIRM) | + WRITE_DATA_CACHE_POLICY(0)); + amdgpu_ring_write(ring, lower_32_bits(csa_addr + + offsetof(struct v10_gfx_meta_data, ce_payload))); + amdgpu_ring_write(ring, upper_32_bits(csa_addr + + offsetof(struct v10_gfx_meta_data, ce_payload))); + + if (resume) + amdgpu_ring_write_multiple(ring, adev->virt.csa_cpu_addr + + offsetof(struct v10_gfx_meta_data, + ce_payload), + sizeof(ce_payload) >> 2); + else + amdgpu_ring_write_multiple(ring, (void *)&ce_payload, + sizeof(ce_payload) >> 2); +} + +static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) +{ + struct amdgpu_device *adev = ring->adev; + struct v10_de_ib_state de_payload = {0}; + uint64_t csa_addr, gds_addr; + int cnt; + + csa_addr = amdgpu_csa_vaddr(ring->adev); + gds_addr = ALIGN(csa_addr + AMDGPU_CSA_SIZE - adev->gds.gds_size, + PAGE_SIZE); + de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); + de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); + + cnt = (sizeof(de_payload) >> 2) + 4 - 2; + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | + WRITE_DATA_DST_SEL(8) | + WR_CONFIRM) | + WRITE_DATA_CACHE_POLICY(0)); + amdgpu_ring_write(ring, lower_32_bits(csa_addr + + offsetof(struct v10_gfx_meta_data, de_payload))); + amdgpu_ring_write(ring, upper_32_bits(csa_addr + + offsetof(struct v10_gfx_meta_data, de_payload))); + + if (resume) + amdgpu_ring_write_multiple(ring, adev->virt.csa_cpu_addr + + offsetof(struct v10_gfx_meta_data, + de_payload), + sizeof(de_payload) >> 2); + else + amdgpu_ring_write_multiple(ring, (void *)&de_payload, + sizeof(de_payload) >> 2); +} + +static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); + amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ +} + +static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) +{ + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); + amdgpu_ring_write(ring, 0 | /* src: register*/ + (5 << 8) | /* dst: memory */ + (1 << 20)); /* write confirm */ + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + + adev->virt.reg_val_offs * 4)); + amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + + adev->virt.reg_val_offs * 4)); +} + +static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val) +{ + uint32_t cmd = 0; + + switch (ring->funcs->type) { + case AMDGPU_RING_TYPE_GFX: + cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; + break; + case AMDGPU_RING_TYPE_KIQ: + cmd = (1 << 16); /* no inc addr */ + break; + default: + cmd = WR_CONFIRM; + break; + } + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, cmd); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); +} + +static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); +} + +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + struct amdgpu_device *adev = ring->adev; + bool fw_version_ok = false; + + fw_version_ok = adev->gfx.cp_fw_write_wait; + + if (fw_version_ok) + gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, + ref, mask, 0x20); + else + amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, + ref, mask); +} + +static void +gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, + uint32_t me, uint32_t pipe, + enum amdgpu_interrupt_state state) +{ + uint32_t cp_int_cntl, cp_int_cntl_reg; + + if (!me) { + switch (pipe) { + case 0: + cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0); + break; + case 1: + cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING1); + break; + default: + DRM_DEBUG("invalid pipe %d\n", pipe); + return; + } + } else { + DRM_DEBUG("invalid me %d\n", me); + return; + } + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + cp_int_cntl = RREG32(cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + TIME_STAMP_INT_ENABLE, 0); + WREG32(cp_int_cntl_reg, cp_int_cntl); + break; + case AMDGPU_IRQ_STATE_ENABLE: + cp_int_cntl = RREG32(cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + TIME_STAMP_INT_ENABLE, 1); + WREG32(cp_int_cntl_reg, cp_int_cntl); + break; + default: + break; + } +} + +static void gfx_v10_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, + int me, int pipe, + enum amdgpu_interrupt_state state) +{ + u32 mec_int_cntl, mec_int_cntl_reg; + + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + + if (me == 1) { + switch (pipe) { + case 0: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); + break; + case 1: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); + break; + case 2: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); + break; + case 3: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); + break; + default: + DRM_DEBUG("invalid pipe %d\n", pipe); + return; + } + } else { + DRM_DEBUG("invalid me %d\n", me); + return; + } + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + TIME_STAMP_INT_ENABLE, 0); + WREG32(mec_int_cntl_reg, mec_int_cntl); + break; + case AMDGPU_IRQ_STATE_ENABLE: + mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + TIME_STAMP_INT_ENABLE, 1); + WREG32(mec_int_cntl_reg, mec_int_cntl); + break; + default: + break; + } +} + +static int gfx_v10_0_set_eop_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + switch (type) { + case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: + gfx_v10_0_set_gfx_eop_interrupt_state(adev, 0, 0, state); + break; + case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP: + gfx_v10_0_set_gfx_eop_interrupt_state(adev, 0, 1, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: + gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 0, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: + gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 1, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: + gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 2, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: + gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 3, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: + gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 0, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: + gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 1, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: + gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 2, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: + gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 3, state); + break; + default: + break; + } + return 0; +} + +static int gfx_v10_0_eop_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + int i; + u8 me_id, pipe_id, queue_id; + struct amdgpu_ring *ring; + + DRM_DEBUG("IH: CP EOP\n"); + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + queue_id = (entry->ring_id & 0x70) >> 4; + + switch (me_id) { + case 0: + if (pipe_id == 0) + amdgpu_fence_process(&adev->gfx.gfx_ring[0]); + else + amdgpu_fence_process(&adev->gfx.gfx_ring[1]); + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + /* Per-queue interrupt is supported for MEC starting from VI. + * The interrupt can only be enabled/disabled per pipe instead of per queue. + */ + if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) + amdgpu_fence_process(ring); + } + break; + } + return 0; +} + +static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + break; + default: + break; + } + + return 0; +} + +static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, + PRIV_INSTR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + default: + break; + } + + return 0; +} + +static void gfx_v10_0_handle_priv_fault(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry) +{ + u8 me_id, pipe_id, queue_id; + struct amdgpu_ring *ring; + int i; + + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + queue_id = (entry->ring_id & 0x70) >> 4; + + switch (me_id) { + case 0: + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + /* we only enabled 1 gfx queue per pipe for now */ + if (ring->me == me_id && ring->pipe == pipe_id) + drm_sched_fault(&ring->sched); + } + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + default: + BUG(); + } +} + +static int gfx_v10_0_priv_reg_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal register access in command stream\n"); + gfx_v10_0_handle_priv_fault(adev, entry); + return 0; +} + +static int gfx_v10_0_priv_inst_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal instruction in command stream\n"); + gfx_v10_0_handle_priv_fault(adev, entry); + return 0; +} + +static int gfx_v10_0_kiq_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + uint32_t tmp, target; + struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); + + if (ring->me == 1) + target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); + else + target = SOC15_REG_OFFSET(GC, 0, mmCP_ME2_PIPE0_INT_CNTL); + target += ring->pipe; + + switch (type) { + case AMDGPU_CP_KIQ_IRQ_DRIVER0: + if (state == AMDGPU_IRQ_STATE_DISABLE) { + tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL); + tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, + GENERIC2_INT_ENABLE, 0); + WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp); + + tmp = RREG32(target); + tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, + GENERIC2_INT_ENABLE, 0); + WREG32(target, tmp); + } else { + tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL); + tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, + GENERIC2_INT_ENABLE, 1); + WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp); + + tmp = RREG32(target); + tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, + GENERIC2_INT_ENABLE, 1); + WREG32(target, tmp); + } + break; + default: + BUG(); /* kiq only support GENERIC2_INT now */ + break; + } + return 0; +} + +static int gfx_v10_0_kiq_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + u8 me_id, pipe_id, queue_id; + struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); + + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + queue_id = (entry->ring_id & 0x70) >> 4; + DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", + me_id, pipe_id, queue_id); + + amdgpu_fence_process(ring); + return 0; +} + +static const struct amd_ip_funcs gfx_v10_0_ip_funcs = { + .name = "gfx_v10_0", + .early_init = gfx_v10_0_early_init, + .late_init = gfx_v10_0_late_init, + .sw_init = gfx_v10_0_sw_init, + .sw_fini = gfx_v10_0_sw_fini, + .hw_init = gfx_v10_0_hw_init, + .hw_fini = gfx_v10_0_hw_fini, + .suspend = gfx_v10_0_suspend, + .resume = gfx_v10_0_resume, + .is_idle = gfx_v10_0_is_idle, + .wait_for_idle = gfx_v10_0_wait_for_idle, + .soft_reset = gfx_v10_0_soft_reset, + .set_clockgating_state = gfx_v10_0_set_clockgating_state, + .set_powergating_state = gfx_v10_0_set_powergating_state, + .get_clockgating_state = gfx_v10_0_get_clockgating_state, +}; + +static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { + .type = AMDGPU_RING_TYPE_GFX, + .align_mask = 0xff, + .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = true, + .vmhub = AMDGPU_GFXHUB_0, + .get_rptr = gfx_v10_0_ring_get_rptr_gfx, + .get_wptr = gfx_v10_0_ring_get_wptr_gfx, + .set_wptr = gfx_v10_0_ring_set_wptr_gfx, + .emit_frame_size = /* totally 242 maximum if 16 IBs */ + 5 + /* COND_EXEC */ + 7 + /* PIPELINE_SYNC */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + + 2 + /* VM_FLUSH */ + 8 + /* FENCE for VM_FLUSH */ + 20 + /* GDS switch */ + 4 + /* double SWITCH_BUFFER, + * the first COND_EXEC jump to the place + * just prior to this double SWITCH_BUFFER + */ + 5 + /* COND_EXEC */ + 7 + /* HDP_flush */ + 4 + /* VGT_flush */ + 14 + /* CE_META */ + 31 + /* DE_META */ + 3 + /* CNTX_CTRL */ + 5 + /* HDP_INVL */ + 8 + 8 + /* FENCE x2 */ + 2, /* SWITCH_BUFFER */ + .emit_ib_size = 4, /* gfx_v10_0_ring_emit_ib_gfx */ + .emit_ib = gfx_v10_0_ring_emit_ib_gfx, + .emit_fence = gfx_v10_0_ring_emit_fence, + .emit_pipeline_sync = gfx_v10_0_ring_emit_pipeline_sync, + .emit_vm_flush = gfx_v10_0_ring_emit_vm_flush, + .emit_gds_switch = gfx_v10_0_ring_emit_gds_switch, + .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush, + .test_ring = gfx_v10_0_ring_test_ring, + .test_ib = gfx_v10_0_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_switch_buffer = gfx_v10_0_ring_emit_sb, + .emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl, + .init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec, + .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec, + .preempt_ib = gfx_v10_0_ring_preempt_ib, + .emit_tmz = gfx_v10_0_ring_emit_tmz, + .emit_wreg = gfx_v10_0_ring_emit_wreg, + .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, +}; + +static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { + .type = AMDGPU_RING_TYPE_COMPUTE, + .align_mask = 0xff, + .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = true, + .vmhub = AMDGPU_GFXHUB_0, + .get_rptr = gfx_v10_0_ring_get_rptr_compute, + .get_wptr = gfx_v10_0_ring_get_wptr_compute, + .set_wptr = gfx_v10_0_ring_set_wptr_compute, + .emit_frame_size = + 20 + /* gfx_v10_0_ring_emit_gds_switch */ + 7 + /* gfx_v10_0_ring_emit_hdp_flush */ + 5 + /* hdp invalidate */ + 7 + /* gfx_v10_0_ring_emit_pipeline_sync */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + + 2 + /* gfx_v10_0_ring_emit_vm_flush */ + 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */ + .emit_ib = gfx_v10_0_ring_emit_ib_compute, + .emit_fence = gfx_v10_0_ring_emit_fence, + .emit_pipeline_sync = gfx_v10_0_ring_emit_pipeline_sync, + .emit_vm_flush = gfx_v10_0_ring_emit_vm_flush, + .emit_gds_switch = gfx_v10_0_ring_emit_gds_switch, + .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush, + .test_ring = gfx_v10_0_ring_test_ring, + .test_ib = gfx_v10_0_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_wreg = gfx_v10_0_ring_emit_wreg, + .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, +}; + +static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { + .type = AMDGPU_RING_TYPE_KIQ, + .align_mask = 0xff, + .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = true, + .vmhub = AMDGPU_GFXHUB_0, + .get_rptr = gfx_v10_0_ring_get_rptr_compute, + .get_wptr = gfx_v10_0_ring_get_wptr_compute, + .set_wptr = gfx_v10_0_ring_set_wptr_compute, + .emit_frame_size = + 20 + /* gfx_v10_0_ring_emit_gds_switch */ + 7 + /* gfx_v10_0_ring_emit_hdp_flush */ + 5 + /*hdp invalidate */ + 7 + /* gfx_v10_0_ring_emit_pipeline_sync */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + + 2 + /* gfx_v10_0_ring_emit_vm_flush */ + 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence_kiq x3 for user fence, vm fence */ + .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */ + .emit_ib = gfx_v10_0_ring_emit_ib_compute, + .emit_fence = gfx_v10_0_ring_emit_fence_kiq, + .test_ring = gfx_v10_0_ring_test_ring, + .test_ib = gfx_v10_0_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_rreg = gfx_v10_0_ring_emit_rreg, + .emit_wreg = gfx_v10_0_ring_emit_wreg, + .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, +}; + +static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + adev->gfx.kiq.ring.funcs = &gfx_v10_0_ring_funcs_kiq; + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) + adev->gfx.gfx_ring[i].funcs = &gfx_v10_0_ring_funcs_gfx; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) + adev->gfx.compute_ring[i].funcs = &gfx_v10_0_ring_funcs_compute; +} + +static const struct amdgpu_irq_src_funcs gfx_v10_0_eop_irq_funcs = { + .set = gfx_v10_0_set_eop_interrupt_state, + .process = gfx_v10_0_eop_irq, +}; + +static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_reg_irq_funcs = { + .set = gfx_v10_0_set_priv_reg_fault_state, + .process = gfx_v10_0_priv_reg_irq, +}; + +static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_inst_irq_funcs = { + .set = gfx_v10_0_set_priv_inst_fault_state, + .process = gfx_v10_0_priv_inst_irq, +}; + +static const struct amdgpu_irq_src_funcs gfx_v10_0_kiq_irq_funcs = { + .set = gfx_v10_0_kiq_set_interrupt_state, + .process = gfx_v10_0_kiq_irq, +}; + +static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; + adev->gfx.eop_irq.funcs = &gfx_v10_0_eop_irq_funcs; + + adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; + adev->gfx.kiq.irq.funcs = &gfx_v10_0_kiq_irq_funcs; + + adev->gfx.priv_reg_irq.num_types = 1; + adev->gfx.priv_reg_irq.funcs = &gfx_v10_0_priv_reg_irq_funcs; + + adev->gfx.priv_inst_irq.num_types = 1; + adev->gfx.priv_inst_irq.funcs = &gfx_v10_0_priv_inst_irq_funcs; +} + +static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs; + break; + default: + break; + } +} + +static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev) +{ + unsigned total_cu = adev->gfx.config.max_cu_per_sh * + adev->gfx.config.max_sh_per_se * + adev->gfx.config.max_shader_engines; + + adev->gds.gds_size = 0x10000; + adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1; + adev->gds.gws_size = 64; + adev->gds.oa_size = 16; +} + +static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev, + u32 bitmap) +{ + u32 data; + + if (!bitmap) + return; + + data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; + data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; + + WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); +} + +static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev) +{ + u32 data, wgp_bitmask; + data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); + data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); + + data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; + data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; + + wgp_bitmask = + amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); + + return (~data) & wgp_bitmask; +} + +static u32 gfx_v10_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev) +{ + u32 wgp_idx, wgp_active_bitmap; + u32 cu_bitmap_per_wgp, cu_active_bitmap; + + wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev); + cu_active_bitmap = 0; + + for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) { + /* if there is one WGP enabled, it means 2 CUs will be enabled */ + cu_bitmap_per_wgp = 3 << (2 * wgp_idx); + if (wgp_active_bitmap & (1 << wgp_idx)) + cu_active_bitmap |= cu_bitmap_per_wgp; + } + + return cu_active_bitmap; +} + +static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, + struct amdgpu_cu_info *cu_info) +{ + int i, j, k, counter, active_cu_number = 0; + u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; + unsigned disable_masks[4 * 2]; + + if (!adev || !cu_info) + return -EINVAL; + + amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + mask = 1; + ao_bitmap = 0; + counter = 0; + gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); + if (i < 4 && j < 2) + gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh( + adev, disable_masks[i * 2 + j]); + bitmap = gfx_v10_0_get_cu_active_bitmap_per_sh(adev); + cu_info->bitmap[i][j] = bitmap; + + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { + if (bitmap & mask) { + if (counter < adev->gfx.config.max_cu_per_sh) + ao_bitmap |= mask; + counter++; + } + mask <<= 1; + } + active_cu_number += counter; + if (i < 2 && j < 2) + ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + cu_info->ao_cu_bitmap[i][j] = ao_bitmap; + } + } + gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + + cu_info->number = active_cu_number; + cu_info->ao_cu_mask = ao_cu_mask; + cu_info->simd_per_cu = NUM_SIMD_PER_CU; + + return 0; +} + +const struct amdgpu_ip_block_version gfx_v10_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_GFX, + .major = 10, + .minor = 0, + .rev = 0, + .funcs = &gfx_v10_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/vi_dpm.h b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.h index c43e03fddfba..b442e50324d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.h @@ -1,5 +1,5 @@ /* - * Copyright 2014 Advanced Micro Devices, Inc. + * Copyright 2019 dvanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,12 +21,9 @@ * */ -#ifndef __VI_DPM_H__ -#define __VI_DPM_H__ +#ifndef __GFX_V10_0_H__ +#define __GFX_V10_0_H__ -extern const struct amd_ip_funcs cz_dpm_ip_funcs; -int cz_smu_init(struct amdgpu_device *adev); -int cz_smu_start(struct amdgpu_device *adev); -int cz_smu_fini(struct amdgpu_device *adev); +extern const struct amdgpu_ip_block_version gfx_v10_0_ip_block; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 305276c7e4bf..7f0a63628c43 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -21,6 +21,8 @@ * */ #include <linux/firmware.h> +#include <linux/module.h> + #include "amdgpu.h" #include "amdgpu_ih.h" #include "amdgpu_gfx.h" @@ -782,6 +784,25 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); + tilemode[18] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_8x16); + tilemode[19] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[20] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | @@ -1793,7 +1814,7 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring) tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) @@ -3022,7 +3043,7 @@ static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v6_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q) + u32 me, u32 pipe, u32 q, u32 vm) { DRM_INFO("Not implemented\n"); } @@ -3094,7 +3115,7 @@ static int gfx_v6_0_sw_init(void *handle) ring->ring_obj = NULL; sprintf(ring->name, "gfx"); r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); + &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); if (r) return r; } @@ -3329,7 +3350,7 @@ static int gfx_v6_0_set_eop_interrupt_state(struct amdgpu_device *adev, enum amdgpu_interrupt_state state) { switch (type) { - case AMDGPU_CP_IRQ_GFX_EOP: + case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: gfx_v6_0_set_gfx_eop_interrupt_state(adev, state); break; case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index a59e0fdf5a97..791ba398f007 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -20,8 +20,10 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ + #include <linux/firmware.h> -#include <drm/drmP.h> +#include <linux/module.h> + #include "amdgpu.h" #include "amdgpu_ih.h" #include "amdgpu_gfx.h" @@ -1877,6 +1879,33 @@ static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev) } cik_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); + + /* Initialize all compute VMIDs to have no GDS, GWS, or OA + acccess. These should be enabled by FW for target VMIDs. */ + for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + WREG32(amdgpu_gds_reg_offset[i].mem_base, 0); + WREG32(amdgpu_gds_reg_offset[i].mem_size, 0); + WREG32(amdgpu_gds_reg_offset[i].gws, 0); + WREG32(amdgpu_gds_reg_offset[i].oa, 0); + } +} + +static void gfx_v7_0_init_gds_vmid(struct amdgpu_device *adev) +{ + int vmid; + + /* + * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA + * access. Compute VMIDs should be enabled by FW for target VMIDs, + * the driver can enable them for graphics. VMID0 should maintain + * access so that HWS firmware can save/restore entries. + */ + for (vmid = 1; vmid < 16; vmid++) { + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0); + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0); + WREG32(amdgpu_gds_reg_offset[vmid].gws, 0); + WREG32(amdgpu_gds_reg_offset[vmid].oa, 0); + } } static void gfx_v7_0_config_init(struct amdgpu_device *adev) @@ -1957,6 +1986,7 @@ static void gfx_v7_0_constants_init(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); gfx_v7_0_init_compute_vmid(adev); + gfx_v7_0_init_gds_vmid(adev); WREG32(mmSX_DEBUG_1, 0x20); @@ -2080,7 +2110,7 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring) tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) r = -ETIMEDOUT; @@ -4167,9 +4197,9 @@ static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v7_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q) + u32 me, u32 pipe, u32 q, u32 vm) { - cik_srbm_select(adev, me, pipe, q, 0); + cik_srbm_select(adev, me, pipe, q, vm); } static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = { @@ -4460,7 +4490,7 @@ static int gfx_v7_0_sw_init(void *handle) ring->ring_obj = NULL; sprintf(ring->name, "gfx"); r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); + &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); if (r) return r; } @@ -4493,12 +4523,8 @@ static int gfx_v7_0_sw_init(void *handle) static int gfx_v7_0_sw_fini(void *handle) { - int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); + int i; for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); @@ -4801,7 +4827,7 @@ static int gfx_v7_0_set_eop_interrupt_state(struct amdgpu_device *adev, enum amdgpu_interrupt_state state) { switch (type) { - case AMDGPU_CP_IRQ_GFX_EOP: + case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: gfx_v7_0_set_gfx_eop_interrupt_state(adev, state); break; case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: @@ -5070,30 +5096,10 @@ static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev) static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev) { /* init asci gds info */ - adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); - adev->gds.gws.total_size = 64; - adev->gds.oa.total_size = 16; + adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE); + adev->gds.gws_size = 64; + adev->gds.oa_size = 16; adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID); - - if (adev->gds.mem.total_size == 64 * 1024) { - adev->gds.mem.gfx_partition_size = 4096; - adev->gds.mem.cs_partition_size = 4096; - - adev->gds.gws.gfx_partition_size = 4; - adev->gds.gws.cs_partition_size = 4; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 1; - } else { - adev->gds.mem.gfx_partition_size = 1024; - adev->gds.mem.cs_partition_size = 1024; - - adev->gds.gws.gfx_partition_size = 16; - adev->gds.gws.cs_partition_size = 16; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 4; - } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index b8e50a34bdb3..ffbde9136372 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -20,9 +20,13 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ + +#include <linux/delay.h> #include <linux/kernel.h> #include <linux/firmware.h> -#include <drm/drmP.h> +#include <linux/module.h> +#include <linux/pci.h> + #include "amdgpu.h" #include "amdgpu_gfx.h" #include "vi.h" @@ -855,7 +859,7 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) @@ -1317,6 +1321,39 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) return 0; } +static int gfx_v8_0_csb_vram_pin(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); + if (unlikely(r != 0)) + return r; + + r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, + AMDGPU_GEM_DOMAIN_VRAM); + if (!r) + adev->gfx.rlc.clear_state_gpu_addr = + amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); + + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + + return r; +} + +static void gfx_v8_0_csb_vram_unpin(struct amdgpu_device *adev) +{ + int r; + + if (!adev->gfx.rlc.clear_state_obj) + return; + + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); + if (likely(r == 0)) { + amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + } +} + static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) { amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); @@ -2005,7 +2042,7 @@ static int gfx_v8_0_sw_init(void *handle) } r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, - AMDGPU_CP_IRQ_GFX_EOP); + AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); if (r) return r; } @@ -2042,7 +2079,7 @@ static int gfx_v8_0_sw_init(void *handle) return r; /* create MQD for all compute queues as well as KIQ for SRIOV case */ - r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); + r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); if (r) return r; @@ -2057,20 +2094,16 @@ static int gfx_v8_0_sw_init(void *handle) static int gfx_v8_0_sw_fini(void *handle) { - int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); + int i; for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - amdgpu_gfx_compute_mqd_sw_fini(adev); - amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); + amdgpu_gfx_mqd_sw_fini(adev); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); amdgpu_gfx_kiq_fini(adev); gfx_v8_0_mec_fini(adev); @@ -3236,6 +3269,7 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) dev_warn(adev->dev, "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", adev->asic_type); + /* fall through */ case CHIP_CARRIZO: modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | @@ -3435,9 +3469,9 @@ static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, } static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q) + u32 me, u32 pipe, u32 q, u32 vm) { - vi_srbm_select(adev, me, pipe, q, 0); + vi_srbm_select(adev, me, pipe, q, vm); } static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) @@ -3705,6 +3739,33 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) } vi_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); + + /* Initialize all compute VMIDs to have no GDS, GWS, or OA + acccess. These should be enabled by FW for target VMIDs. */ + for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + WREG32(amdgpu_gds_reg_offset[i].mem_base, 0); + WREG32(amdgpu_gds_reg_offset[i].mem_size, 0); + WREG32(amdgpu_gds_reg_offset[i].gws, 0); + WREG32(amdgpu_gds_reg_offset[i].oa, 0); + } +} + +static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev) +{ + int vmid; + + /* + * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA + * access. Compute VMIDs should be enabled by FW for target VMIDs, + * the driver can enable them for graphics. VMID0 should maintain + * access so that HWS firmware can save/restore entries. + */ + for (vmid = 1; vmid < 16; vmid++) { + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0); + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0); + WREG32(amdgpu_gds_reg_offset[vmid].gws, 0); + WREG32(amdgpu_gds_reg_offset[vmid].oa, 0); + } } static void gfx_v8_0_config_init(struct amdgpu_device *adev) @@ -3773,6 +3834,7 @@ static void gfx_v8_0_constants_init(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); gfx_v8_0_init_compute_vmid(adev); + gfx_v8_0_init_gds_vmid(adev); mutex_lock(&adev->grbm_idx_mutex); /* @@ -3924,11 +3986,10 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) int list_size; unsigned int *register_list_format = - kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); + kmemdup(adev->gfx.rlc.register_list_format, + adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); if (!register_list_format) return -ENOMEM; - memcpy(register_list_format, adev->gfx.rlc.register_list_format, - adev->gfx.rlc.reg_list_format_size_bytes); gfx_v8_0_parse_ind_reg_list(register_list_format, RLC_FormatDirectRegListLength, @@ -4776,6 +4837,10 @@ static int gfx_v8_0_hw_init(void *handle) gfx_v8_0_init_golden_registers(adev); gfx_v8_0_constants_init(adev); + r = gfx_v8_0_csb_vram_pin(adev); + if (r) + return r; + r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -4892,6 +4957,9 @@ static int gfx_v8_0_hw_fini(void *handle) else pr_err("rlc is busy, skip halt rlc\n"); amdgpu_gfx_rlc_exit_safe_mode(adev); + + gfx_v8_0_csb_vram_unpin(adev); + return 0; } @@ -6216,7 +6284,7 @@ static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, struct amdgpu_ring *iring; mutex_lock(&adev->gfx.pipe_reserve_mutex); - pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); + pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0); if (acquire) set_bit(pipe, adev->gfx.pipe_reserve_bitmap); else @@ -6235,20 +6303,20 @@ static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, /* Lower all pipes without a current reservation */ for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { iring = &adev->gfx.gfx_ring[i]; - pipe = amdgpu_gfx_queue_to_bit(adev, - iring->me, - iring->pipe, - 0); + pipe = amdgpu_gfx_mec_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); gfx_v8_0_ring_set_pipe_percent(iring, reserve); } for (i = 0; i < adev->gfx.num_compute_rings; ++i) { iring = &adev->gfx.compute_ring[i]; - pipe = amdgpu_gfx_queue_to_bit(adev, - iring->me, - iring->pipe, - 0); + pipe = amdgpu_gfx_mec_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); gfx_v8_0_ring_set_pipe_percent(iring, reserve); } @@ -6536,7 +6604,7 @@ static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, enum amdgpu_interrupt_state state) { switch (type) { - case AMDGPU_CP_IRQ_GFX_EOP: + case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); break; case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: @@ -7009,30 +7077,10 @@ static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) { /* init asci gds info */ - adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); - adev->gds.gws.total_size = 64; - adev->gds.oa.total_size = 16; + adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE); + adev->gds.gws_size = 64; + adev->gds.oa_size = 16; adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID); - - if (adev->gds.mem.total_size == 64 * 1024) { - adev->gds.mem.gfx_partition_size = 4096; - adev->gds.mem.cs_partition_size = 4096; - - adev->gds.gws.gfx_partition_size = 4; - adev->gds.gws.cs_partition_size = 4; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 1; - } else { - adev->gds.mem.gfx_partition_size = 1024; - adev->gds.mem.cs_partition_size = 1024; - - adev->gds.gws.gfx_partition_size = 16; - adev->gds.gws.cs_partition_size = 16; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 4; - } } static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index a11db2b1a63f..faf2ffce5837 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -20,17 +20,23 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ + +#include <linux/delay.h> #include <linux/kernel.h> #include <linux/firmware.h> -#include <drm/drmP.h> +#include <linux/module.h> +#include <linux/pci.h> + #include "amdgpu.h" #include "amdgpu_gfx.h" #include "soc15.h" #include "soc15d.h" #include "amdgpu_atomfirmware.h" +#include "amdgpu_pm.h" #include "gc/gc_9_0_offset.h" #include "gc/gc_9_0_sh_mask.h" + #include "vega10_enum.h" #include "hdp/hdp_4_0_offset.h" @@ -40,6 +46,8 @@ #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" +#include "amdgpu_ras.h" + #define GFX9_NUM_GFX_RINGS 1 #define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L @@ -52,6 +60,9 @@ #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L +#define mmGCEA_PROBE_MAP 0x070c +#define mmGCEA_PROBE_MAP_BASE_IDX 0 + MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); MODULE_FIRMWARE("amdgpu/vega10_me.bin"); @@ -94,6 +105,410 @@ MODULE_FIRMWARE("amdgpu/raven2_me.bin"); MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); +MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/renoir_ce.bin"); +MODULE_FIRMWARE("amdgpu/renoir_pfp.bin"); +MODULE_FIRMWARE("amdgpu/renoir_me.bin"); +MODULE_FIRMWARE("amdgpu/renoir_mec.bin"); +MODULE_FIRMWARE("amdgpu/renoir_mec2.bin"); +MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); + +#define mmTCP_CHAN_STEER_0_ARCT 0x0b03 +#define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 +#define mmTCP_CHAN_STEER_1_ARCT 0x0b04 +#define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 +#define mmTCP_CHAN_STEER_2_ARCT 0x0b09 +#define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 +#define mmTCP_CHAN_STEER_3_ARCT 0x0b0a +#define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 +#define mmTCP_CHAN_STEER_4_ARCT 0x0b0b +#define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 +#define mmTCP_CHAN_STEER_5_ARCT 0x0b0c +#define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 + +struct ras_gfx_subblock_reg { + const char *name; + uint32_t hwip; + uint32_t inst; + uint32_t seg; + uint32_t reg_offset; + uint32_t sec_count_mask; + uint32_t sec_count_shift; + uint32_t ded_count_mask; + uint32_t ded_count_shift; +}; + +enum ta_ras_gfx_subblock { + /*CPC*/ + TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, + TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, + TA_RAS_BLOCK__GFX_CPC_UCODE, + TA_RAS_BLOCK__GFX_DC_STATE_ME1, + TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, + TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, + TA_RAS_BLOCK__GFX_DC_STATE_ME2, + TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, + TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, + TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, + /* CPF*/ + TA_RAS_BLOCK__GFX_CPF_INDEX_START, + TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, + TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, + TA_RAS_BLOCK__GFX_CPF_TAG, + TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, + /* CPG*/ + TA_RAS_BLOCK__GFX_CPG_INDEX_START, + TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, + TA_RAS_BLOCK__GFX_CPG_DMA_TAG, + TA_RAS_BLOCK__GFX_CPG_TAG, + TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, + /* GDS*/ + TA_RAS_BLOCK__GFX_GDS_INDEX_START, + TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, + TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, + TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, + TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, + TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, + TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, + /* SPI*/ + TA_RAS_BLOCK__GFX_SPI_SR_MEM, + /* SQ*/ + TA_RAS_BLOCK__GFX_SQ_INDEX_START, + TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, + TA_RAS_BLOCK__GFX_SQ_LDS_D, + TA_RAS_BLOCK__GFX_SQ_LDS_I, + TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ + TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, + /* SQC (3 ranges)*/ + TA_RAS_BLOCK__GFX_SQC_INDEX_START, + /* SQC range 0*/ + TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, + TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = + TA_RAS_BLOCK__GFX_SQC_INDEX0_START, + TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, + TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, + TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, + TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, + TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, + TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, + TA_RAS_BLOCK__GFX_SQC_INDEX0_END = + TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, + /* SQC range 1*/ + TA_RAS_BLOCK__GFX_SQC_INDEX1_START, + TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = + TA_RAS_BLOCK__GFX_SQC_INDEX1_START, + TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, + TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, + TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, + TA_RAS_BLOCK__GFX_SQC_INDEX1_END = + TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, + /* SQC range 2*/ + TA_RAS_BLOCK__GFX_SQC_INDEX2_START, + TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = + TA_RAS_BLOCK__GFX_SQC_INDEX2_START, + TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, + TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, + TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, + TA_RAS_BLOCK__GFX_SQC_INDEX2_END = + TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, + TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, + /* TA*/ + TA_RAS_BLOCK__GFX_TA_INDEX_START, + TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, + TA_RAS_BLOCK__GFX_TA_FS_AFIFO, + TA_RAS_BLOCK__GFX_TA_FL_LFIFO, + TA_RAS_BLOCK__GFX_TA_FX_LFIFO, + TA_RAS_BLOCK__GFX_TA_FS_CFIFO, + TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, + /* TCA*/ + TA_RAS_BLOCK__GFX_TCA_INDEX_START, + TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, + TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, + TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, + /* TCC (5 sub-ranges)*/ + TA_RAS_BLOCK__GFX_TCC_INDEX_START, + /* TCC range 0*/ + TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, + TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, + TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, + TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, + TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, + TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, + TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, + TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, + TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, + TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, + /* TCC range 1*/ + TA_RAS_BLOCK__GFX_TCC_INDEX1_START, + TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, + TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, + TA_RAS_BLOCK__GFX_TCC_INDEX1_END = + TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, + /* TCC range 2*/ + TA_RAS_BLOCK__GFX_TCC_INDEX2_START, + TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, + TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, + TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, + TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, + TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, + TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, + TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, + TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, + TA_RAS_BLOCK__GFX_TCC_INDEX2_END = + TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, + /* TCC range 3*/ + TA_RAS_BLOCK__GFX_TCC_INDEX3_START, + TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, + TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, + TA_RAS_BLOCK__GFX_TCC_INDEX3_END = + TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, + /* TCC range 4*/ + TA_RAS_BLOCK__GFX_TCC_INDEX4_START, + TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = + TA_RAS_BLOCK__GFX_TCC_INDEX4_START, + TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, + TA_RAS_BLOCK__GFX_TCC_INDEX4_END = + TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, + TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, + /* TCI*/ + TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, + /* TCP*/ + TA_RAS_BLOCK__GFX_TCP_INDEX_START, + TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, + TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, + TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, + TA_RAS_BLOCK__GFX_TCP_VM_FIFO, + TA_RAS_BLOCK__GFX_TCP_DB_RAM, + TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, + TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, + TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, + /* TD*/ + TA_RAS_BLOCK__GFX_TD_INDEX_START, + TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, + TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, + TA_RAS_BLOCK__GFX_TD_CS_FIFO, + TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, + /* EA (3 sub-ranges)*/ + TA_RAS_BLOCK__GFX_EA_INDEX_START, + /* EA range 0*/ + TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, + TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, + TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, + TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, + TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, + TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, + TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, + TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, + TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, + TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, + /* EA range 1*/ + TA_RAS_BLOCK__GFX_EA_INDEX1_START, + TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, + TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, + TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, + TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, + TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, + TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, + TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, + TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, + /* EA range 2*/ + TA_RAS_BLOCK__GFX_EA_INDEX2_START, + TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, + TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, + TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, + TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, + TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, + TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, + /* UTC VM L2 bank*/ + TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, + /* UTC VM walker*/ + TA_RAS_BLOCK__UTC_VML2_WALKER, + /* UTC ATC L2 2MB cache*/ + TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, + /* UTC ATC L2 4KB cache*/ + TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, + TA_RAS_BLOCK__GFX_MAX +}; + +struct ras_gfx_subblock { + unsigned char *name; + int ta_subblock; + int hw_supported_error_type; + int sw_supported_error_type; +}; + +#define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ + [AMDGPU_RAS_BLOCK__##subblock] = { \ + #subblock, \ + TA_RAS_BLOCK__##subblock, \ + ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ + (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ + } + +static const struct ras_gfx_subblock ras_gfx_subblocks[] = { + AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, + 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, + 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, + 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, + 1), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, + 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, + 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, + 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, + 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, + 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, + 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, + 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, + 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), +}; static const struct soc15_reg_golden golden_settings_gc_9_0[] = { @@ -114,9 +529,9 @@ static const struct soc15_reg_golden golden_settings_gc_9_0[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) }; static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = @@ -179,9 +594,9 @@ static const struct soc15_reg_golden golden_settings_gc_9_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) }; static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = @@ -218,6 +633,22 @@ static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), }; +static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc), +}; + static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), @@ -257,9 +688,23 @@ static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) +}; + +static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00), }; static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = @@ -300,17 +745,22 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); +static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); +static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status); +static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, + void *inject_if); static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA10: soc15_program_register_sequence(adev, - golden_settings_gc_9_0, - ARRAY_SIZE(golden_settings_gc_9_0)); + golden_settings_gc_9_0, + ARRAY_SIZE(golden_settings_gc_9_0)); soc15_program_register_sequence(adev, - golden_settings_gc_9_0_vg10, - ARRAY_SIZE(golden_settings_gc_9_0_vg10)); + golden_settings_gc_9_0_vg10, + ARRAY_SIZE(golden_settings_gc_9_0_vg10)); break; case CHIP_VEGA12: soc15_program_register_sequence(adev, @@ -328,6 +778,11 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_9_0_vg20, ARRAY_SIZE(golden_settings_gc_9_0_vg20)); break; + case CHIP_ARCTURUS: + soc15_program_register_sequence(adev, + golden_settings_gc_9_4_1_arct, + ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); + break; case CHIP_RAVEN: soc15_program_register_sequence(adev, golden_settings_gc_9_1, ARRAY_SIZE(golden_settings_gc_9_1)); @@ -340,12 +795,18 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_9_1_rv1, ARRAY_SIZE(golden_settings_gc_9_1_rv1)); break; + case CHIP_RENOIR: + soc15_program_register_sequence(adev, + golden_settings_gc_9_1_rn, + ARRAY_SIZE(golden_settings_gc_9_1_rn)); + return; /* for renoir, don't need common goldensetting */ default: break; } - soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, - (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); + if (adev->asic_type != CHIP_ARCTURUS) + soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, + (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); } static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) @@ -415,7 +876,7 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) @@ -526,6 +987,13 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) adev->gfx.me_fw_write_wait = false; adev->gfx.mec_fw_write_wait = false; + if ((adev->gfx.mec_fw_version < 0x000001a5) || + (adev->gfx.mec_feature_version < 46) || + (adev->gfx.pfp_fw_version < 0x000000b7) || + (adev->gfx.pfp_feature_version < 46)) + DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \ + GRBM requires 1-cycle delay in cp firmware\n"); + switch (adev->asic_type) { case CHIP_VEGA10: if ((adev->gfx.me_fw_version >= 0x0000009c) && @@ -576,43 +1044,51 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) } } -static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) +static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) { - const char *chip_name; - char fw_name[30]; - int err; - struct amdgpu_firmware_info *info = NULL; - const struct common_firmware_header *header = NULL; - const struct gfx_firmware_header_v1_0 *cp_hdr; - const struct rlc_firmware_header_v2_0 *rlc_hdr; - unsigned int *tmp = NULL; - unsigned int i = 0; - uint16_t version_major; - uint16_t version_minor; - - DRM_DEBUG("\n"); - switch (adev->asic_type) { case CHIP_VEGA10: - chip_name = "vega10"; - break; case CHIP_VEGA12: - chip_name = "vega12"; - break; case CHIP_VEGA20: - chip_name = "vega20"; break; case CHIP_RAVEN: - if (adev->rev_id >= 8) - chip_name = "raven2"; - else if (adev->pdev->device == 0x15d8) - chip_name = "picasso"; - else - chip_name = "raven"; + /* Disable GFXOFF on original raven. There are combinations + * of sbios and platforms that are not stable. + */ + if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)) + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + else if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) + &&((adev->gfx.rlc_fw_version != 106 && + adev->gfx.rlc_fw_version < 531) || + (adev->gfx.rlc_fw_version == 53815) || + (adev->gfx.rlc_feature_version < 1) || + !adev->gfx.rlc.is_rlc_v2_1)) + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + + if (adev->pm.pp_feature & PP_GFXOFF_MASK) + adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_RLC_SMU_HS; + break; + case CHIP_RENOIR: + if (adev->pm.pp_feature & PP_GFXOFF_MASK) + adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_RLC_SMU_HS; break; default: - BUG(); + break; } +} + +static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, + const char *chip_name) +{ + char fw_name[30]; + int err; + struct amdgpu_firmware_info *info = NULL; + const struct common_firmware_header *header = NULL; + const struct gfx_firmware_header_v1_0 *cp_hdr; snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); @@ -647,6 +1123,58 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; + info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; + info->fw = adev->gfx.pfp_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; + info->ucode_id = AMDGPU_UCODE_ID_CP_ME; + info->fw = adev->gfx.me_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; + info->ucode_id = AMDGPU_UCODE_ID_CP_CE; + info->fw = adev->gfx.ce_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + } + +out: + if (err) { + dev_err(adev->dev, + "gfx9: Failed to load firmware \"%s\"\n", + fw_name); + release_firmware(adev->gfx.pfp_fw); + adev->gfx.pfp_fw = NULL; + release_firmware(adev->gfx.me_fw); + adev->gfx.me_fw = NULL; + release_firmware(adev->gfx.ce_fw); + adev->gfx.ce_fw = NULL; + } + return err; +} + +static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, + const char *chip_name) +{ + char fw_name[30]; + int err; + struct amdgpu_firmware_info *info = NULL; + const struct common_firmware_header *header = NULL; + const struct rlc_firmware_header_v2_0 *rlc_hdr; + unsigned int *tmp = NULL; + unsigned int i = 0; + uint16_t version_major; + uint16_t version_minor; + uint32_t smu_version; + /* * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin * instead of picasso_rlc.bin. @@ -659,6 +1187,12 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); + else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && + (smu_version >= 0x41e2b)) + /** + *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. + */ + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); else snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); @@ -715,57 +1249,7 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) if (adev->gfx.rlc.is_rlc_v2_1) gfx_v9_0_init_rlc_ext_microcode(adev); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); - err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); - if (err) - goto out; - err = amdgpu_ucode_validate(adev->gfx.mec_fw); - if (err) - goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; - adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - - - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); - err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); - if (!err) { - err = amdgpu_ucode_validate(adev->gfx.mec2_fw); - if (err) - goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *) - adev->gfx.mec2_fw->data; - adev->gfx.mec2_fw_version = - le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.mec2_feature_version = - le32_to_cpu(cp_hdr->ucode_feature_version); - } else { - err = 0; - adev->gfx.mec2_fw = NULL; - } - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; - info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; - info->fw = adev->gfx.pfp_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; - info->ucode_id = AMDGPU_UCODE_ID_CP_ME; - info->fw = adev->gfx.me_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; - info->ucode_id = AMDGPU_UCODE_ID_CP_CE; - info->fw = adev->gfx.ce_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; info->ucode_id = AMDGPU_UCODE_ID_RLC_G; info->fw = adev->gfx.rlc_fw; @@ -795,7 +1279,58 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) adev->firmware.fw_size += ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); } + } +out: + if (err) { + dev_err(adev->dev, + "gfx9: Failed to load firmware \"%s\"\n", + fw_name); + release_firmware(adev->gfx.rlc_fw); + adev->gfx.rlc_fw = NULL; + } + return err; +} + +static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, + const char *chip_name) +{ + char fw_name[30]; + int err; + struct amdgpu_firmware_info *info = NULL; + const struct common_firmware_header *header = NULL; + const struct gfx_firmware_header_v1_0 *cp_hdr; + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); + err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.mec_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); + err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); + if (!err) { + err = amdgpu_ucode_validate(adev->gfx.mec2_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.mec2_fw->data; + adev->gfx.mec2_fw_version = + le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.mec2_feature_version = + le32_to_cpu(cp_hdr->ucode_feature_version); + } else { + err = 0; + adev->gfx.mec2_fw = NULL; + } + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; info->fw = adev->gfx.mec_fw; @@ -818,29 +1353,28 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; adev->firmware.fw_size += ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; - info->fw = adev->gfx.mec2_fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); - } + /* TODO: Determine if MEC2 JT FW loading can be removed + for all GFX V9 asic and above */ + if (adev->asic_type != CHIP_ARCTURUS && + adev->asic_type != CHIP_RENOIR) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; + info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; + info->fw = adev->gfx.mec2_fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, + PAGE_SIZE); + } + } } out: + gfx_v9_0_check_if_need_gfxoff(adev); gfx_v9_0_check_fw_write_wait(adev); if (err) { dev_err(adev->dev, "gfx9: Failed to load firmware \"%s\"\n", fw_name); - release_firmware(adev->gfx.pfp_fw); - adev->gfx.pfp_fw = NULL; - release_firmware(adev->gfx.me_fw); - adev->gfx.me_fw = NULL; - release_firmware(adev->gfx.ce_fw); - adev->gfx.ce_fw = NULL; - release_firmware(adev->gfx.rlc_fw); - adev->gfx.rlc_fw = NULL; release_firmware(adev->gfx.mec_fw); adev->gfx.mec_fw = NULL; release_firmware(adev->gfx.mec2_fw); @@ -849,6 +1383,59 @@ out: return err; } +static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) +{ + const char *chip_name; + int r; + + DRM_DEBUG("\n"); + + switch (adev->asic_type) { + case CHIP_VEGA10: + chip_name = "vega10"; + break; + case CHIP_VEGA12: + chip_name = "vega12"; + break; + case CHIP_VEGA20: + chip_name = "vega20"; + break; + case CHIP_RAVEN: + if (adev->rev_id >= 8) + chip_name = "raven2"; + else if (adev->pdev->device == 0x15d8) + chip_name = "picasso"; + else + chip_name = "raven"; + break; + case CHIP_ARCTURUS: + chip_name = "arcturus"; + break; + case CHIP_RENOIR: + chip_name = "renoir"; + break; + default: + BUG(); + } + + /* No CPG in Arcturus */ + if (adev->asic_type != CHIP_ARCTURUS) { + r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name); + if (r) + return r; + } + + r = gfx_v9_0_init_rlc_microcode(adev, chip_name); + if (r) + return r; + + r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name); + if (r) + return r; + + return r; +} + static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) { u32 count = 0; @@ -1086,7 +1673,7 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) return r; } - if (adev->asic_type == CHIP_RAVEN) { + if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { /* TODO: double check the cp_table_size for RV */ adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ r = amdgpu_gfx_rlc_init_cpt(adev); @@ -1271,9 +1858,9 @@ static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q) + u32 me, u32 pipe, u32 q, u32 vm) { - soc15_grbm_select(adev, me, pipe, q, 0); + soc15_grbm_select(adev, me, pipe, q, vm); } static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { @@ -1282,7 +1869,9 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { .read_wave_data = &gfx_v9_0_read_wave_data, .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, - .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q + .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, + .ras_error_inject = &gfx_v9_0_ras_error_inject, + .query_ras_error_count = &gfx_v9_0_query_ras_error_count }; static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) @@ -1335,6 +1924,26 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) else gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; break; + case CHIP_ARCTURUS: + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); + gb_addr_config &= ~0xf3e777ff; + gb_addr_config |= 0x22014042; + break; + case CHIP_RENOIR: + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); + gb_addr_config &= ~0xf3e777ff; + gb_addr_config |= 0x22010042; + break; default: BUG(); break; @@ -1380,191 +1989,6 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) return 0; } -static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, - struct amdgpu_ngg_buf *ngg_buf, - int size_se, - int default_size_se) -{ - int r; - - if (size_se < 0) { - dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se); - return -EINVAL; - } - size_se = size_se ? size_se : default_size_se; - - ngg_buf->size = size_se * adev->gfx.config.max_shader_engines; - r = amdgpu_bo_create_kernel(adev, ngg_buf->size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &ngg_buf->bo, - &ngg_buf->gpu_addr, - NULL); - if (r) { - dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r); - return r; - } - ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo); - - return r; -} - -static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev) -{ - int i; - - for (i = 0; i < NGG_BUF_MAX; i++) - amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo, - &adev->gfx.ngg.buf[i].gpu_addr, - NULL); - - memset(&adev->gfx.ngg.buf[0], 0, - sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX); - - adev->gfx.ngg.init = false; - - return 0; -} - -static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) -{ - int r; - - if (!amdgpu_ngg || adev->gfx.ngg.init == true) - return 0; - - /* GDS reserve memory: 64 bytes alignment */ - adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); - adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size; - adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size; - adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE); - adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); - - /* Primitive Buffer */ - r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM], - amdgpu_prim_buf_per_se, - 64 * 1024); - if (r) { - dev_err(adev->dev, "Failed to create Primitive Buffer\n"); - goto err; - } - - /* Position Buffer */ - r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS], - amdgpu_pos_buf_per_se, - 256 * 1024); - if (r) { - dev_err(adev->dev, "Failed to create Position Buffer\n"); - goto err; - } - - /* Control Sideband */ - r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL], - amdgpu_cntl_sb_buf_per_se, - 256); - if (r) { - dev_err(adev->dev, "Failed to create Control Sideband Buffer\n"); - goto err; - } - - /* Parameter Cache, not created by default */ - if (amdgpu_param_buf_per_se <= 0) - goto out; - - r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM], - amdgpu_param_buf_per_se, - 512 * 1024); - if (r) { - dev_err(adev->dev, "Failed to create Parameter Cache\n"); - goto err; - } - -out: - adev->gfx.ngg.init = true; - return 0; -err: - gfx_v9_0_ngg_fini(adev); - return r; -} - -static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) -{ - struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; - int r; - u32 data, base; - - if (!amdgpu_ngg) - return 0; - - /* Program buffer size */ - data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, - adev->gfx.ngg.buf[NGG_PRIM].size >> 8); - data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, - adev->gfx.ngg.buf[NGG_POS].size >> 8); - WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data); - - data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, - adev->gfx.ngg.buf[NGG_CNTL].size >> 8); - data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, - adev->gfx.ngg.buf[NGG_PARAM].size >> 10); - WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data); - - /* Program buffer base address */ - base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); - data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base); - WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data); - - base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); - data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base); - WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data); - - base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); - data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base); - WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data); - - base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); - data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base); - WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data); - - base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); - data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base); - WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data); - - base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); - data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base); - WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data); - - /* Clear GDS reserved memory */ - r = amdgpu_ring_alloc(ring, 17); - if (r) { - DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n", - ring->name, r); - return r; - } - - gfx_v9_0_write_data_to_reg(ring, 0, false, - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), - (adev->gds.mem.total_size + - adev->gfx.ngg.gds_reserve_size)); - - amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); - amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | - PACKET3_DMA_DATA_DST_SEL(1) | - PACKET3_DMA_DATA_SRC_SEL(2))); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | - adev->gfx.ngg.gds_reserve_size); - - gfx_v9_0_write_data_to_reg(ring, 0, false, - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0); - - amdgpu_ring_commit(ring); - - return 0; -} - static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, int mec, int pipe, int queue) { @@ -1612,6 +2036,8 @@ static int gfx_v9_0_sw_init(void *handle) case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_ARCTURUS: + case CHIP_RENOIR: adev->gfx.mec.num_mec = 2; break; default: @@ -1639,6 +2065,18 @@ static int gfx_v9_0_sw_init(void *handle) if (r) return r; + /* ECC error */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, + &adev->gfx.cp_ecc_error_irq); + if (r) + return r; + + /* FUE error */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, + &adev->gfx.cp_ecc_error_irq); + if (r) + return r; + adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; gfx_v9_0_scratch_init(adev); @@ -1672,7 +2110,7 @@ static int gfx_v9_0_sw_init(void *handle) ring->use_doorbell = true; ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); + &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); if (r) return r; } @@ -1708,7 +2146,7 @@ static int gfx_v9_0_sw_init(void *handle) return r; /* create MQD for all compute queues as wel as KIQ for SRIOV case */ - r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); + r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); if (r) return r; @@ -1718,10 +2156,6 @@ static int gfx_v9_0_sw_init(void *handle) if (r) return r; - r = gfx_v9_0_ngg_init(adev); - if (r) - return r; - return 0; } @@ -1731,25 +2165,20 @@ static int gfx_v9_0_sw_fini(void *handle) int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); + amdgpu_gfx_ras_fini(adev); for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - amdgpu_gfx_compute_mqd_sw_fini(adev); - amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); + amdgpu_gfx_mqd_sw_fini(adev); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); amdgpu_gfx_kiq_fini(adev); gfx_v9_0_mec_fini(adev); - gfx_v9_0_ngg_fini(adev); - amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, - &adev->gfx.rlc.clear_state_gpu_addr, - (void **)&adev->gfx.rlc.cs_ptr); - if (adev->asic_type == CHIP_RAVEN) { + amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); + if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, &adev->gfx.rlc.cp_table_gpu_addr, (void **)&adev->gfx.rlc.cp_table_ptr); @@ -1784,7 +2213,7 @@ static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh else data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); - WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); + WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); } static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) @@ -1852,11 +2281,38 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { soc15_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ - WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); - WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); + WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); + WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); } soc15_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); + + /* Initialize all compute VMIDs to have no GDS, GWS, or OA + acccess. These should be enabled by FW for target VMIDs. */ + for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); + } +} + +static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) +{ + int vmid; + + /* + * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA + * access. Compute VMIDs should be enabled by FW for target VMIDs, + * the driver can enable them for graphics. VMID0 should maintain + * access so that HWS firmware can save/restore entries. + */ + for (vmid = 1; vmid < 16; vmid++) { + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); + } } static void gfx_v9_0_constants_init(struct amdgpu_device *adev) @@ -1864,7 +2320,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) u32 tmp; int i; - WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); + WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); gfx_v9_0_tiling_mode_table_init(adev); @@ -1875,23 +2331,27 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) { + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { soc15_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ if (i == 0) { tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); - WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); - WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0); + tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, + !!amdgpu_noretry); + WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); + WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); } else { tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); - WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); + tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, + !!amdgpu_noretry); + WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, (adev->gmc.private_aperture_start >> 48)); tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, (adev->gmc.shared_aperture_start >> 48)); - WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp); + WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); } } soc15_grbm_select(adev, 0, 0, 0, 0); @@ -1899,25 +2359,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); gfx_v9_0_init_compute_vmid(adev); - - mutex_lock(&adev->grbm_idx_mutex); - /* - * making sure that the following register writes will be broadcasted - * to all the shaders - */ - gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - - WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE, - (adev->gfx.config.sc_prim_fifo_size_frontend << - PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | - (adev->gfx.config.sc_prim_fifo_size_backend << - PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | - (adev->gfx.config.sc_hiz_tile_fifo_size << - PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | - (adev->gfx.config.sc_earlyz_tile_fifo_size << - PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); - mutex_unlock(&adev->grbm_idx_mutex); - + gfx_v9_0_init_gds_vmid(adev); } static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) @@ -1974,11 +2416,11 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, static void gfx_v9_0_init_csb(struct amdgpu_device *adev) { /* csib */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), adev->gfx.rlc.clear_state_gpu_addr >> 32); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), adev->gfx.rlc.clear_state_size); } @@ -2032,11 +2474,10 @@ static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) u32 tmp = 0; u32 *register_list_format = - kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); + kmemdup(adev->gfx.rlc.register_list_format, + adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); if (!register_list_format) return -ENOMEM; - memcpy(register_list_format, adev->gfx.rlc.register_list_format, - adev->gfx.rlc.reg_list_format_size_bytes); /* setup unique_indirect_regs array and indirect_start_offsets array */ unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); @@ -2303,7 +2744,10 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev) * And it's needed by gfxoff feature. */ if (adev->gfx.rlc.is_rlc_v2_1) { - gfx_v9_1_init_rlc_save_restore_list(adev); + if (adev->asic_type == CHIP_VEGA12 || + (adev->asic_type == CHIP_RAVEN && + adev->rev_id >= 8)) + gfx_v9_1_init_rlc_save_restore_list(adev); gfx_v9_0_enable_save_restore_machine(adev); } @@ -2448,7 +2892,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) for (i = 0; i < adev->gfx.num_gfx_rings; i++) adev->gfx.gfx_ring[i].sched.ready = false; } - WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); + WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); udelay(50); } @@ -2646,9 +3090,9 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) int i; if (enable) { - WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); } else { - WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, + WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); for (i = 0; i < adev->gfx.num_compute_rings; i++) adev->gfx.compute_ring[i].sched.ready = false; @@ -2709,9 +3153,9 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); tmp |= 0x80; - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); } static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) @@ -2795,6 +3239,10 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) mqd->compute_static_thread_mgmt_se1 = 0xffffffff; mqd->compute_static_thread_mgmt_se2 = 0xffffffff; mqd->compute_static_thread_mgmt_se3 = 0xffffffff; + mqd->compute_static_thread_mgmt_se4 = 0xffffffff; + mqd->compute_static_thread_mgmt_se5 = 0xffffffff; + mqd->compute_static_thread_mgmt_se6 = 0xffffffff; + mqd->compute_static_thread_mgmt_se7 = 0xffffffff; mqd->compute_misc_reserved = 0x00000003; mqd->dynamic_cu_mask_addr_lo = @@ -2929,67 +3377,67 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) /* disable wptr polling */ WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi); /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control); /* enable doorbell? */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); /* disable the queue if it's active */ if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); for (j = 0; j < adev->usec_timeout; j++) { if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) break; udelay(1); } - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); } /* set the pointer to the MQD */ - WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, + WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); - WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); /* set MQD vmid to 0 */ - WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, + WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, mqd->cp_mqd_control); /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); /* set up the HQD, this is similar to CP_RB0_CNTL */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); /* set the wb address whether it's enabled or not */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->cp_hqd_pq_rptr_report_addr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->cp_hqd_pq_rptr_report_addr_hi); /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi); /* enable the doorbell if requested */ @@ -3000,23 +3448,23 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) (adev->doorbell_index.userqueue_end * 2) << 2); } - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); /* set the vmid for the queue */ - WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); - WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); /* activate the queue */ - WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active); if (ring->use_doorbell) @@ -3033,7 +3481,7 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) /* disable the queue if it's active */ if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); for (j = 0; j < adev->usec_timeout; j++) { if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) @@ -3045,21 +3493,21 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) DRM_DEBUG("KIQ dequeue request failed.\n"); /* Manual disable if dequeue request times out */ - WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); } - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0); } - WREG32_SOC15(GC, 0, mmCP_HQD_IQ_TIMER, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); return 0; } @@ -3198,10 +3646,12 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) gfx_v9_0_enable_gui_idle_interrupt(adev, false); if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - /* legacy firmware loading */ - r = gfx_v9_0_cp_gfx_load_microcode(adev); - if (r) - return r; + if (adev->asic_type != CHIP_ARCTURUS) { + /* legacy firmware loading */ + r = gfx_v9_0_cp_gfx_load_microcode(adev); + if (r) + return r; + } r = gfx_v9_0_cp_compute_load_microcode(adev); if (r) @@ -3212,18 +3662,22 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) if (r) return r; - r = gfx_v9_0_cp_gfx_resume(adev); - if (r) - return r; + if (adev->asic_type != CHIP_ARCTURUS) { + r = gfx_v9_0_cp_gfx_resume(adev); + if (r) + return r; + } r = gfx_v9_0_kcq_resume(adev); if (r) return r; - ring = &adev->gfx.gfx_ring[0]; - r = amdgpu_ring_test_helper(ring); - if (r) - return r; + if (adev->asic_type != CHIP_ARCTURUS) { + ring = &adev->gfx.gfx_ring[0]; + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; @@ -3237,7 +3691,8 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) { - gfx_v9_0_cp_gfx_enable(adev, enable); + if (adev->asic_type != CHIP_ARCTURUS) + gfx_v9_0_cp_gfx_enable(adev, enable); gfx_v9_0_cp_compute_enable(adev, enable); } @@ -3246,7 +3701,8 @@ static int gfx_v9_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - gfx_v9_0_init_golden_registers(adev); + if (!amdgpu_sriov_vf(adev)) + gfx_v9_0_init_golden_registers(adev); gfx_v9_0_constants_init(adev); @@ -3262,10 +3718,6 @@ static int gfx_v9_0_hw_init(void *handle) if (r) return r; - r = gfx_v9_0_ngg_en(adev); - if (r) - return r; - return r; } @@ -3303,11 +3755,14 @@ static int gfx_v9_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); - /* disable KCQ to avoid CPC touch memory not valid anymore */ - gfx_v9_0_kcq_disable(adev); + /* DF freeze and kcq disable will fail */ + if (!amdgpu_ras_intr_triggered()) + /* disable KCQ to avoid CPC touch memory not valid anymore */ + gfx_v9_0_kcq_disable(adev); if (amdgpu_sriov_vf(adev)) { gfx_v9_0_cp_gfx_enable(adev, false); @@ -3411,8 +3866,9 @@ static int gfx_v9_0_soft_reset(void *handle) /* stop the rlc */ adev->gfx.rlc.funcs->stop(adev); - /* Disable GFX parsing/prefetching */ - gfx_v9_0_cp_gfx_enable(adev, false); + if (adev->asic_type != CHIP_ARCTURUS) + /* Disable GFX parsing/prefetching */ + gfx_v9_0_cp_gfx_enable(adev, false); /* Disable MEC parsing/prefetching */ gfx_v9_0_cp_compute_enable(adev, false); @@ -3442,9 +3898,22 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) uint64_t clock; mutex_lock(&adev->gfx.gpu_clock_mutex); - WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); - clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | - ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) { + uint32_t tmp, lsb, msb, i = 0; + do { + if (i != 0) + udelay(1); + tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB); + lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB); + msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB); + i++; + } while (unlikely(tmp != msb) && (i < adev->usec_timeout)); + clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL); + } else { + WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); + clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | + ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + } mutex_unlock(&adev->gfx.gpu_clock_mutex); return clock; } @@ -3478,11 +3947,292 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); } +static const u32 vgpr_init_compute_shader[] = +{ + 0xb07c0000, 0xbe8000ff, + 0x000000f8, 0xbf110800, + 0x7e000280, 0x7e020280, + 0x7e040280, 0x7e060280, + 0x7e080280, 0x7e0a0280, + 0x7e0c0280, 0x7e0e0280, + 0x80808800, 0xbe803200, + 0xbf84fff5, 0xbf9c0000, + 0xd28c0001, 0x0001007f, + 0xd28d0001, 0x0002027e, + 0x10020288, 0xb8810904, + 0xb7814000, 0xd1196a01, + 0x00000301, 0xbe800087, + 0xbefc00c1, 0xd89c4000, + 0x00020201, 0xd89cc080, + 0x00040401, 0x320202ff, + 0x00000800, 0x80808100, + 0xbf84fff8, 0x7e020280, + 0xbf810000, 0x00000000, +}; + +static const u32 sgpr_init_compute_shader[] = +{ + 0xb07c0000, 0xbe8000ff, + 0x0000005f, 0xbee50080, + 0xbe812c65, 0xbe822c65, + 0xbe832c65, 0xbe842c65, + 0xbe852c65, 0xb77c0005, + 0x80808500, 0xbf84fff8, + 0xbe800080, 0xbf810000, +}; + +static const struct soc15_reg_entry vgpr_init_regs[] = { + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ +}; + +static const struct soc15_reg_entry sgpr_init_regs[] = { + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, +}; + +static const struct soc15_reg_entry sec_ded_counter_registers[] = { + { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, + { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, + { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, + { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, +}; + +static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; + int i, r; + + /* only support when RAS is enabled */ + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return 0; + + r = amdgpu_ring_alloc(ring, 7); + if (r) { + DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", + ring->name, r); + return r; + } + + WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); + WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); + + amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); + amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | + PACKET3_DMA_DATA_DST_SEL(1) | + PACKET3_DMA_DATA_SRC_SEL(2) | + PACKET3_DMA_DATA_ENGINE(0))); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | + adev->gds.gds_size); + + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + + WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); + + return r; +} + +static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; + struct amdgpu_ib ib; + struct dma_fence *f = NULL; + int r, i, j, k; + unsigned total_size, vgpr_offset, sgpr_offset; + u64 gpu_addr; + + /* only support when RAS is enabled */ + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return 0; + + /* bail if the compute ring is not ready */ + if (!ring->sched.ready) + return 0; + + total_size = + ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; + total_size += + ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4; + total_size = ALIGN(total_size, 256); + vgpr_offset = total_size; + total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); + sgpr_offset = total_size; + total_size += sizeof(sgpr_init_compute_shader); + + /* allocate an indirect buffer to put the commands in */ + memset(&ib, 0, sizeof(ib)); + r = amdgpu_ib_get(adev, NULL, total_size, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); + return r; + } + + /* load the compute shaders */ + for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) + ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; + + for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) + ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; + + /* init the ib length to 0 */ + ib.length_dw = 0; + + /* VGPR */ + /* write the register state for the compute dispatch */ + for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) { + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); + ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value; + } + /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ + gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); + ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); + ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); + + /* write dispatch packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); + ib.ptr[ib.length_dw++] = 128; /* x */ + ib.ptr[ib.length_dw++] = 1; /* y */ + ib.ptr[ib.length_dw++] = 1; /* z */ + ib.ptr[ib.length_dw++] = + REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); + + /* write CS partial flush packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); + ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); + + /* SGPR */ + /* write the register state for the compute dispatch */ + for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) { + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); + ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i]) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value; + } + /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ + gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); + ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); + ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); + + /* write dispatch packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); + ib.ptr[ib.length_dw++] = 128; /* x */ + ib.ptr[ib.length_dw++] = 1; /* y */ + ib.ptr[ib.length_dw++] = 1; /* z */ + ib.ptr[ib.length_dw++] = + REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); + + /* write CS partial flush packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); + ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); + + /* shedule the ib on the ring */ + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + if (r) { + DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); + goto fail; + } + + /* wait for the GPU to finish processing the IB */ + r = dma_fence_wait(f, false); + if (r) { + DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); + goto fail; + } + + /* read back registers to clear the counters */ + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { + for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { + for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { + gfx_v9_0_select_se_sh(adev, j, 0x0, k); + RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); + } + } + } + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); + mutex_unlock(&adev->grbm_idx_mutex); + +fail: + amdgpu_ib_free(adev, &ib, NULL); + dma_fence_put(f); + + return r; +} + static int gfx_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; + if (adev->asic_type == CHIP_ARCTURUS) + adev->gfx.num_gfx_rings = 0; + else + adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; gfx_v9_0_set_ring_funcs(adev); gfx_v9_0_set_irq_funcs(adev); @@ -3492,6 +4242,27 @@ static int gfx_v9_0_early_init(void *handle) return 0; } +static int gfx_v9_0_ecc_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_gfx_ras_late_init(adev); + if (r) + return r; + + r = gfx_v9_0_do_edc_gds_workarounds(adev); + if (r) + return r; + + /* requires IBs so do in late init after IB pool is initialized */ + r = gfx_v9_0_do_edc_gpr_workarounds(adev); + if (r) + return r; + + return 0; +} + static int gfx_v9_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -3505,6 +4276,10 @@ static int gfx_v9_0_late_init(void *handle) if (r) return r; + r = gfx_v9_0_ecc_late_init(handle); + if (r) + return r; + return 0; } @@ -3556,7 +4331,8 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); } else { gfx_v9_0_enable_gfx_cg_power_gating(adev, false); - gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); + if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) + gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); } amdgpu_gfx_rlc_exit_safe_mode(adev); @@ -3661,6 +4437,9 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, { uint32_t data, def; + if (adev->asic_type == CHIP_ARCTURUS) + return; + amdgpu_gfx_rlc_enter_safe_mode(adev); /* Enable 3D CGCG/CGLS */ @@ -3726,8 +4505,12 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev /* enable cgcg FSM(0x0000363F) */ def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); - data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | - RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + if (adev->asic_type == CHIP_ARCTURUS) + data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + else + data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; @@ -3799,6 +4582,7 @@ static int gfx_v9_0_set_powergating_state(void *handle, switch (adev->asic_type) { case CHIP_RAVEN: + case CHIP_RENOIR: if (!enable) { amdgpu_gfx_off_ctrl(adev, false); cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); @@ -3853,6 +4637,8 @@ static int gfx_v9_0_set_clockgating_state(void *handle, case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_ARCTURUS: + case CHIP_RENOIR: gfx_v9_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); break; @@ -3894,14 +4680,16 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; - /* AMD_CG_SUPPORT_GFX_3D_CGCG */ - data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); - if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) - *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; + if (adev->asic_type != CHIP_ARCTURUS) { + /* AMD_CG_SUPPORT_GFX_3D_CGCG */ + data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); + if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; - /* AMD_CG_SUPPORT_GFX_3D_CGLS */ - if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) - *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; + /* AMD_CG_SUPPORT_GFX_3D_CGLS */ + if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; + } } static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) @@ -3943,7 +4731,7 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 ref_and_mask, reg_mem_engine; - const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { switch (ring->me) { @@ -3963,8 +4751,8 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) } gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, - adev->nbio_funcs->get_hdp_flush_req_offset(adev), - adev->nbio_funcs->get_hdp_flush_done_offset(adev), + adev->nbio.funcs->get_hdp_flush_req_offset(adev), + adev->nbio.funcs->get_hdp_flush_done_offset(adev), ref_and_mask, ref_and_mask, 0x20); } @@ -4139,7 +4927,7 @@ static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, struct amdgpu_ring *iring; mutex_lock(&adev->gfx.pipe_reserve_mutex); - pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); + pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0); if (acquire) set_bit(pipe, adev->gfx.pipe_reserve_bitmap); else @@ -4158,20 +4946,20 @@ static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, /* Lower all pipes without a current reservation */ for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { iring = &adev->gfx.gfx_ring[i]; - pipe = amdgpu_gfx_queue_to_bit(adev, - iring->me, - iring->pipe, - 0); + pipe = amdgpu_gfx_mec_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); gfx_v9_0_ring_set_pipe_percent(iring, reserve); } for (i = 0; i < adev->gfx.num_compute_rings; ++i) { iring = &adev->gfx.compute_ring[i]; - pipe = amdgpu_gfx_queue_to_bit(adev, - iring->me, - iring->pipe, - 0); + pipe = amdgpu_gfx_mec_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); gfx_v9_0_ring_set_pipe_percent(iring, reserve); } @@ -4190,8 +4978,8 @@ static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev, mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); - WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); soc15_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); @@ -4433,7 +5221,7 @@ static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); - WREG32(mmSQ_CMD, value); + WREG32_SOC15(GC, 0, mmSQ_CMD, value); } static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, @@ -4541,13 +5329,52 @@ static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, return 0; } +#define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ + WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ + CP_ECC_ERROR_INT_ENABLE, 1) + +#define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ + WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ + CP_ECC_ERROR_INT_ENABLE, 0) + +static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, + CP_ECC_ERROR_INT_ENABLE, 0); + DISABLE_ECC_ON_ME_PIPE(1, 0); + DISABLE_ECC_ON_ME_PIPE(1, 1); + DISABLE_ECC_ON_ME_PIPE(1, 2); + DISABLE_ECC_ON_ME_PIPE(1, 3); + break; + + case AMDGPU_IRQ_STATE_ENABLE: + WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, + CP_ECC_ERROR_INT_ENABLE, 1); + ENABLE_ECC_ON_ME_PIPE(1, 0); + ENABLE_ECC_ON_ME_PIPE(1, 1); + ENABLE_ECC_ON_ME_PIPE(1, 2); + ENABLE_ECC_ON_ME_PIPE(1, 3); + break; + default: + break; + } + + return 0; +} + + static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, enum amdgpu_interrupt_state state) { switch (type) { - case AMDGPU_CP_IRQ_GFX_EOP: + case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); break; case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: @@ -4657,6 +5484,739 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, return 0; } + +static const struct ras_gfx_subblock_reg ras_subblock_regs[] = { + { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), + SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), + SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) + }, + { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), + SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT), + SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) + }, + { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), + SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1), + 0, 0 + }, + { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), + SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2), + 0, 0 + }, + { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), + SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT), + SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) + }, + { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), + SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT), + 0, 0 + }, + { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), + SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), + SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT) + }, + { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), + SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT), + SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT) + }, + { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), + SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1), + 0, 0 + }, + { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), + SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1), + 0, 0 + }, + { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), + SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1), + 0, 0 + }, + { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), + SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) + }, + { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), + SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), + 0, 0 + }, + { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) + }, + { "GDS_OA_PHY_PHY_CMD_RAM_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) + }, + { "GDS_OA_PHY_PHY_DATA_RAM_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), + 0, 0 + }, + { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) + }, + { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) + }, + { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) + }, + { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) + }, + { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), + 0, 0 + }, + { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) + }, + { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), + 0, 0 + }, + { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), + 0, 0 + }, + { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), + 0, 0 + }, + { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), + 0, 0 + }, + { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), + SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), + 0, 0 + }, + { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), + SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), + 0, 0 + }, + { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) + }, + { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) + }, + { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) + }, + { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) + }, + { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) + }, + { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), + 0, 0 + }, + { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), + 0, 0 + }, + { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), + 0, 0 + }, + { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), + 0, 0 + }, + { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), + 0, 0 + }, + { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), + 0, 0 + }, + { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), + 0, 0 + }, + { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), + 0, 0 + }, + { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), + 0, 0 + }, + { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), + 0, 0 + }, + { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), + 0, 0 + }, + { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), + 0, 0 + }, + { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), + 0, 0 + }, + { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), + SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), + 0, 0 + }, + { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) + }, + { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) + }, + { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), + 0, 0 + }, + { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), + 0, 0 + }, + { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), + 0, 0 + }, + { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) + }, + { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) + }, + { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), + SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), + SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) + }, + { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), + SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), + SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) + }, + { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), + SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT), + 0, 0 + }, + { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) + }, + { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) + }, + { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) + }, + { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) + }, + { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) + }, + { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) + }, + { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) + }, + { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) + }, + { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) + }, + { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) + }, + { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) + }, + { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) + }, + { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) + }, + { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) + }, + { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) + }, + { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) + }, + { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) + }, + { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), + 0, 0 + }, + { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), + 0, 0 + }, + { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), + 0, 0 + }, + { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), + 0, 0 + }, + { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), + 0, 0 + }, + { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) + }, + { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) + }, + { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) + }, + { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) + }, + { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) + }, + { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), + 0, 0 + }, + { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), + 0, 0 + }, + { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), + 0, 0 + }, + { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), + 0, 0 + }, + { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), + 0, 0 + }, + { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) + }, + { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) + }, + { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) + }, + { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) + }, + { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) + }, + { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0 + }, + { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0 + }, + { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), + 0, 0 + }, + { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), + 0, 0 + }, + { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), + 0, 0 + }, + { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) + }, + { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) + }, + { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) + }, + { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), + 0, 0 + }, + { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), + 0, 0 + }, + { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), + 0, 0 + }, + { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), + 0, 0 + }, + { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), + 0, 0 + }, + { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), + 0, 0 + } +}; + +static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, + void *inject_if) +{ + struct ras_inject_if *info = (struct ras_inject_if *)inject_if; + int ret; + struct ta_ras_trigger_error_input block_info = { 0 }; + + if (adev->asic_type != CHIP_VEGA20) + return -EINVAL; + + if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) + return -EINVAL; + + if (!ras_gfx_subblocks[info->head.sub_block_index].name) + return -EPERM; + + if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & + info->head.type)) { + DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", + ras_gfx_subblocks[info->head.sub_block_index].name, + info->head.type); + return -EPERM; + } + + if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & + info->head.type)) { + DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", + ras_gfx_subblocks[info->head.sub_block_index].name, + info->head.type); + return -EPERM; + } + + block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); + block_info.sub_block_index = + ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; + block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); + block_info.address = info->address; + block_info.value = info->value; + + mutex_lock(&adev->grbm_idx_mutex); + ret = psp_ras_trigger_error(&adev->psp, &block_info); + mutex_unlock(&adev->grbm_idx_mutex); + + return ret; +} + +static const char *vml2_mems[] = { + "UTC_VML2_BANK_CACHE_0_BIGK_MEM0", + "UTC_VML2_BANK_CACHE_0_BIGK_MEM1", + "UTC_VML2_BANK_CACHE_0_4K_MEM0", + "UTC_VML2_BANK_CACHE_0_4K_MEM1", + "UTC_VML2_BANK_CACHE_1_BIGK_MEM0", + "UTC_VML2_BANK_CACHE_1_BIGK_MEM1", + "UTC_VML2_BANK_CACHE_1_4K_MEM0", + "UTC_VML2_BANK_CACHE_1_4K_MEM1", + "UTC_VML2_BANK_CACHE_2_BIGK_MEM0", + "UTC_VML2_BANK_CACHE_2_BIGK_MEM1", + "UTC_VML2_BANK_CACHE_2_4K_MEM0", + "UTC_VML2_BANK_CACHE_2_4K_MEM1", + "UTC_VML2_BANK_CACHE_3_BIGK_MEM0", + "UTC_VML2_BANK_CACHE_3_BIGK_MEM1", + "UTC_VML2_BANK_CACHE_3_4K_MEM0", + "UTC_VML2_BANK_CACHE_3_4K_MEM1", +}; + +static const char *vml2_walker_mems[] = { + "UTC_VML2_CACHE_PDE0_MEM0", + "UTC_VML2_CACHE_PDE0_MEM1", + "UTC_VML2_CACHE_PDE1_MEM0", + "UTC_VML2_CACHE_PDE1_MEM1", + "UTC_VML2_CACHE_PDE2_MEM0", + "UTC_VML2_CACHE_PDE2_MEM1", + "UTC_VML2_RDIF_LOG_FIFO", +}; + +static const char *atc_l2_cache_2m_mems[] = { + "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM", + "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM", + "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM", + "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM", +}; + +static const char *atc_l2_cache_4k_mems[] = { + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7", +}; + +static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, + struct ras_err_data *err_data) +{ + uint32_t i, data; + uint32_t sec_count, ded_count; + + WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); + WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); + + for (i = 0; i < 16; i++) { + WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); + data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); + + sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT); + if (sec_count) { + DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, + vml2_mems[i], sec_count); + err_data->ce_count += sec_count; + } + + ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT); + if (ded_count) { + DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, + vml2_mems[i], ded_count); + err_data->ue_count += ded_count; + } + } + + for (i = 0; i < 7; i++) { + WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); + data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); + + sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, + SEC_COUNT); + if (sec_count) { + DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, + vml2_walker_mems[i], sec_count); + err_data->ce_count += sec_count; + } + + ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, + DED_COUNT); + if (ded_count) { + DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, + vml2_walker_mems[i], ded_count); + err_data->ue_count += ded_count; + } + } + + for (i = 0; i < 4; i++) { + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); + data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); + + sec_count = (data & 0x00006000L) >> 0xd; + if (sec_count) { + DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, + atc_l2_cache_2m_mems[i], sec_count); + err_data->ce_count += sec_count; + } + } + + for (i = 0; i < 32; i++) { + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); + data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); + + sec_count = (data & 0x00006000L) >> 0xd; + if (sec_count) { + DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, + atc_l2_cache_4k_mems[i], sec_count); + err_data->ce_count += sec_count; + } + + ded_count = (data & 0x00018000L) >> 0xf; + if (ded_count) { + DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, + atc_l2_cache_4k_mems[i], ded_count); + err_data->ue_count += ded_count; + } + } + + WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); + + return 0; +} + +static int __get_ras_error_count(const struct soc15_reg_entry *reg, + uint32_t se_id, uint32_t inst_id, uint32_t value, + uint32_t *sec_count, uint32_t *ded_count) +{ + uint32_t i; + uint32_t sec_cnt, ded_cnt; + + for (i = 0; i < ARRAY_SIZE(ras_subblock_regs); i++) { + if(ras_subblock_regs[i].reg_offset != reg->reg_offset || + ras_subblock_regs[i].seg != reg->seg || + ras_subblock_regs[i].inst != reg->inst) + continue; + + sec_cnt = (value & + ras_subblock_regs[i].sec_count_mask) >> + ras_subblock_regs[i].sec_count_shift; + if (sec_cnt) { + DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n", + ras_subblock_regs[i].name, + se_id, inst_id, + sec_cnt); + *sec_count += sec_cnt; + } + + ded_cnt = (value & + ras_subblock_regs[i].ded_count_mask) >> + ras_subblock_regs[i].ded_count_shift; + if (ded_cnt) { + DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n", + ras_subblock_regs[i].name, + se_id, inst_id, + ded_cnt); + *ded_count += ded_cnt; + } + } + + return 0; +} + +static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t sec_count = 0, ded_count = 0; + uint32_t i, j, k; + uint32_t reg_value; + + if (adev->asic_type != CHIP_VEGA20) + return -EINVAL; + + err_data->ue_count = 0; + err_data->ce_count = 0; + + mutex_lock(&adev->grbm_idx_mutex); + + for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { + for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { + for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { + gfx_v9_0_select_se_sh(adev, j, 0, k); + reg_value = + RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); + if (reg_value) + __get_ras_error_count(&sec_ded_counter_registers[i], + j, k, reg_value, + &sec_count, &ded_count); + } + } + } + + err_data->ce_count += sec_count; + err_data->ue_count += ded_count; + + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + + gfx_v9_0_query_utc_edc_status(adev, err_data); + + return 0; +} + static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { .name = "gfx_v9_0", .early_init = gfx_v9_0_early_init, @@ -4680,7 +6240,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB, + .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v9_0_ring_get_rptr_gfx, .get_wptr = gfx_v9_0_ring_get_wptr_gfx, .set_wptr = gfx_v9_0_ring_set_wptr_gfx, @@ -4731,7 +6291,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB, + .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v9_0_ring_get_rptr_compute, .get_wptr = gfx_v9_0_ring_get_wptr_compute, .set_wptr = gfx_v9_0_ring_set_wptr_compute, @@ -4766,7 +6326,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB, + .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v9_0_ring_get_rptr_compute, .get_wptr = gfx_v9_0_ring_get_wptr_compute, .set_wptr = gfx_v9_0_ring_set_wptr_compute, @@ -4818,6 +6378,12 @@ static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { .process = gfx_v9_0_priv_inst_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { + .set = gfx_v9_0_set_cp_ecc_error_state, + .process = amdgpu_gfx_cp_ecc_error_irq, +}; + + static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) { adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; @@ -4828,6 +6394,9 @@ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; + + adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ + adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; } static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) @@ -4837,6 +6406,8 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_ARCTURUS: + case CHIP_RENOIR: adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; break; default: @@ -4851,13 +6422,14 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: - adev->gds.mem.total_size = 0x10000; + adev->gds.gds_size = 0x10000; break; case CHIP_RAVEN: - adev->gds.mem.total_size = 0x1000; + case CHIP_ARCTURUS: + adev->gds.gds_size = 0x1000; break; default: - adev->gds.mem.total_size = 0x10000; + adev->gds.gds_size = 0x10000; break; } @@ -4875,34 +6447,17 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) else adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ break; + case CHIP_ARCTURUS: + adev->gds.gds_compute_max_wave_id = 0xfff; + break; default: /* this really depends on the chip */ adev->gds.gds_compute_max_wave_id = 0x7ff; break; } - adev->gds.gws.total_size = 64; - adev->gds.oa.total_size = 16; - - if (adev->gds.mem.total_size == 64 * 1024) { - adev->gds.mem.gfx_partition_size = 4096; - adev->gds.mem.cs_partition_size = 4096; - - adev->gds.gws.gfx_partition_size = 4; - adev->gds.gws.cs_partition_size = 4; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 1; - } else { - adev->gds.mem.gfx_partition_size = 1024; - adev->gds.mem.cs_partition_size = 1024; - - adev->gds.gws.gfx_partition_size = 16; - adev->gds.gws.cs_partition_size = 16; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 4; - } + adev->gds.gws_size = 64; + adev->gds.oa_size = 16; } static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, @@ -4939,12 +6494,21 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, { int i, j, k, counter, active_cu_number = 0; u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; - unsigned disable_masks[4 * 2]; + unsigned disable_masks[4 * 4]; if (!adev || !cu_info) return -EINVAL; - amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); + /* + * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs + */ + if (adev->gfx.config.max_shader_engines * + adev->gfx.config.max_sh_per_se > 16) + return -EINVAL; + + amdgpu_gfx_parse_disable_cu(disable_masks, + adev->gfx.config.max_shader_engines, + adev->gfx.config.max_sh_per_se); mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { @@ -4953,11 +6517,23 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, ao_bitmap = 0; counter = 0; gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); - if (i < 4 && j < 2) - gfx_v9_0_set_user_cu_inactive_bitmap( - adev, disable_masks[i * 2 + j]); + gfx_v9_0_set_user_cu_inactive_bitmap( + adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); bitmap = gfx_v9_0_get_cu_active_bitmap(adev); - cu_info->bitmap[i][j] = bitmap; + + /* + * The bitmap(and ao_cu_bitmap) in cu_info structure is + * 4x4 size array, and it's usually suitable for Vega + * ASICs which has 4*2 SE/SH layout. + * But for Arcturus, SE/SH layout is changed to 8*1. + * To mostly reduce the impact, we make it compatible + * with current bitmap array as below: + * SE4,SH0 --> bitmap[0][1] + * SE5,SH0 --> bitmap[1][1] + * SE6,SH0 --> bitmap[2][1] + * SE7,SH0 --> bitmap[3][1] + */ + cu_info->bitmap[i % 4][j + i / 4] = bitmap; for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (bitmap & mask) { @@ -4970,7 +6546,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, active_cu_number += counter; if (i < 2 && j < 2) ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); - cu_info->ao_cu_bitmap[i][j] = ao_bitmap; + cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; } } gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index f5edddf3b29d..e91bd7945777 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -71,12 +71,12 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) uint64_t value; /* Program the AGP BAR */ - WREG32_SOC15(GC, 0, mmMC_VM_AGP_BASE, 0); - WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); - WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); /* Program the system aperture low logical page number. */ - WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, + WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8) @@ -86,11 +86,11 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) * workaround that increase system aperture high address (add 1) * to get rid of the VM fault and hardware hang. */ - WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, max((adev->gmc.fb_end >> 18) + 0x1, adev->gmc.agp_end >> 18)); else - WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ @@ -129,7 +129,7 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); - WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); } static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev) @@ -143,15 +143,15 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev) /* XXX for emulation, Refer to closed source code.*/ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); - WREG32_SOC15(GC, 0, mmVM_L2_CNTL, tmp); + WREG32_SOC15_RLC(GC, 0, mmVM_L2_CNTL, tmp); tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL2); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); - WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp); + WREG32_SOC15_RLC(GC, 0, mmVM_L2_CNTL2, tmp); tmp = mmVM_L2_CNTL3_DEFAULT; if (adev->gmc.translate_further) { @@ -163,12 +163,12 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6); } - WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp); + WREG32_SOC15_RLC(GC, 0, mmVM_L2_CNTL3, tmp); tmp = mmVM_L2_CNTL4_DEFAULT; tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); - WREG32_SOC15(GC, 0, mmVM_L2_CNTL4, tmp); + WREG32_SOC15_RLC(GC, 0, mmVM_L2_CNTL4, tmp); } static void gfxhub_v1_0_enable_system_domain(struct amdgpu_device *adev) @@ -178,6 +178,8 @@ static void gfxhub_v1_0_enable_system_domain(struct amdgpu_device *adev) tmp = RREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); WREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL, tmp); } @@ -236,7 +238,8 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) block_size); /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, + !amdgpu_noretry); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i, tmp); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); @@ -267,9 +270,9 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) * VF copy registers so vbios post doesn't program them, for * SRIOV driver need to program them */ - WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE, + WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_BASE, adev->gmc.vram_start >> 24); - WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP, + WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_TOP, adev->gmc.vram_end >> 24); } @@ -303,7 +306,7 @@ void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev) MC_VM_MX_L1_TLB_CNTL, ENABLE_ADVANCED_DRIVER_MODEL, 0); - WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); /* Setup L2 cache */ WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0); @@ -356,7 +359,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, void gfxhub_v1_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(GC, 0, @@ -364,6 +367,8 @@ void gfxhub_v1_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c new file mode 100644 index 000000000000..b70c7b483c24 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -0,0 +1,371 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "gfxhub_v2_0.h" + +#include "gc/gc_10_1_0_offset.h" +#include "gc/gc_10_1_0_sh_mask.h" +#include "gc/gc_10_1_0_default.h" +#include "navi10_enum.h" + +#include "soc15_common.h" + +u64 gfxhub_v2_0_get_fb_location(struct amdgpu_device *adev) +{ + u64 base = RREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_BASE); + + base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK; + base <<= 24; + + return base; +} + +u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev) +{ + return (u64)RREG32_SOC15(GC, 0, mmGCMC_VM_FB_OFFSET) << 24; +} + +void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base) +{ + /* two registers distance between mmGCVM_CONTEXT0_* to mmGCVM_CONTEXT1_* */ + int offset = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 + - mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + offset * vmid, lower_32_bits(page_table_base)); + + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + offset * vmid, upper_32_bits(page_table_base)); +} + +static void gfxhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + gfxhub_v2_0_setup_vm_pt_regs(adev, 0, pt_base); + + WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->gmc.gart_start >> 12)); + WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->gmc.gart_start >> 44)); + + WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->gmc.gart_end >> 44)); +} + +static void gfxhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t value; + + /* Disable AGP. */ + WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BASE, 0); + WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_TOP, 0); + WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, 0x00FFFFFF); + + /* Program the system aperture low logical page number. */ + WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, + adev->gmc.vram_start >> 18); + WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + adev->gmc.vram_end >> 18); + + /* Set default page address. */ + value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + (u32)(value >> 12)); + WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + (u32)((u64)adev->dummy_page_addr >> 44)); + + WREG32_FIELD15(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); +} + + +static void gfxhub_v2_0_init_tlb_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* Setup TLB control */ + tmp = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL); + + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC); /* UC, uncached */ + + WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, tmp); +} + +static void gfxhub_v2_0_init_cache_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* Setup L2 cache */ + tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, + ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1); + /* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, + L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL, tmp); + + tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2, tmp); + + tmp = mmGCVM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, tmp); + + tmp = mmGCVM_L2_CNTL4_DEFAULT; + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL4, tmp); +} + +static void gfxhub_v2_0_enable_system_domain(struct amdgpu_device *adev) +{ + uint32_t tmp; + + tmp = RREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL, tmp); +} + +static void gfxhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev) +{ + WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + 0xFFFFFFFF); + WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + 0x0000000F); + + WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, + 0); + WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, + 0); + + WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0); + WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0); + +} + +static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) +{ + int i; + uint32_t tmp; + + for (i = 0; i <= 14; i++) { + tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, + adev->vm_manager.num_level); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, + adev->vm_manager.block_size - 9); + /* Send no-retry XNACK on fault to suppress VM fault storm. */ + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, + !amdgpu_noretry); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i, tmp); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2, + upper_32_bits(adev->vm_manager.max_pfn - 1)); + } +} + +static void gfxhub_v2_0_program_invalidation(struct amdgpu_device *adev) +{ + unsigned i; + + for (i = 0 ; i < 18; ++i) { + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + 2 * i, 0xffffffff); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + 2 * i, 0x1f); + } +} + +int gfxhub_v2_0_gart_enable(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev)) { + /* + * GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are + * VF copy registers so vbios post doesn't program them, for + * SRIOV driver need to program them + */ + WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_BASE, + adev->gmc.vram_start >> 24); + WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_TOP, + adev->gmc.vram_end >> 24); + } + + /* GART Enable. */ + gfxhub_v2_0_init_gart_aperture_regs(adev); + gfxhub_v2_0_init_system_aperture_regs(adev); + gfxhub_v2_0_init_tlb_regs(adev); + gfxhub_v2_0_init_cache_regs(adev); + + gfxhub_v2_0_enable_system_domain(adev); + gfxhub_v2_0_disable_identity_aperture(adev); + gfxhub_v2_0_setup_vmid_config(adev); + gfxhub_v2_0_program_invalidation(adev); + + return 0; +} + +void gfxhub_v2_0_gart_disable(struct amdgpu_device *adev) +{ + u32 tmp; + u32 i; + + /* Disable all tables */ + for (i = 0; i < 16; i++) + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, i, 0); + + /* Setup TLB control */ + tmp = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, tmp); + + /* Setup L2 cache */ + WREG32_FIELD15(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, 0); +} + +/** + * gfxhub_v2_0_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +void gfxhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, + bool value) +{ + u32 tmp; + tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + if (!value) { + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_NO_RETRY_FAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_RETRY_FAULT, 1); + } + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL, tmp); +} + +void gfxhub_v2_0_init(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + + hub->ctx0_ptb_addr_lo32 = + SOC15_REG_OFFSET(GC, 0, + mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32); + hub->ctx0_ptb_addr_hi32 = + SOC15_REG_OFFSET(GC, 0, + mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_SEM); + hub->vm_inv_eng0_req = + SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ); + hub->vm_inv_eng0_ack = + SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ACK); + hub->vm_context0_cntl = + SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL); + hub->vm_l2_pro_fault_status = + SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS); + hub->vm_l2_pro_fault_cntl = + SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL); +} diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h new file mode 100644 index 000000000000..392b8cd94fc0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h @@ -0,0 +1,37 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __GFXHUB_V2_0_H__ +#define __GFXHUB_V2_0_H__ + +u64 gfxhub_v2_0_get_fb_location(struct amdgpu_device *adev); +int gfxhub_v2_0_gart_enable(struct amdgpu_device *adev); +void gfxhub_v2_0_gart_disable(struct amdgpu_device *adev); +void gfxhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, + bool value); +void gfxhub_v2_0_init(struct amdgpu_device *adev); +u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev); +void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c new file mode 100644 index 000000000000..321f8a997be8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -0,0 +1,1006 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <linux/firmware.h> +#include <linux/pci.h> +#include "amdgpu.h" +#include "amdgpu_atomfirmware.h" +#include "gmc_v10_0.h" + +#include "hdp/hdp_5_0_0_offset.h" +#include "hdp/hdp_5_0_0_sh_mask.h" +#include "gc/gc_10_1_0_sh_mask.h" +#include "mmhub/mmhub_2_0_0_sh_mask.h" +#include "dcn/dcn_2_0_0_offset.h" +#include "dcn/dcn_2_0_0_sh_mask.h" +#include "oss/osssys_5_0_0_offset.h" +#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" +#include "navi10_enum.h" + +#include "soc15.h" +#include "soc15_common.h" + +#include "nbio_v2_3.h" + +#include "gfxhub_v2_0.h" +#include "mmhub_v2_0.h" +#include "athub_v2_0.h" +/* XXX Move this macro to navi10 header file, which is like vid.h for VI.*/ +#define AMDGPU_NUM_OF_VMIDS 8 + +#if 0 +static const struct soc15_reg_golden golden_settings_navi10_hdp[] = +{ + /* TODO add golden setting for hdp */ +}; +#endif + +static int +gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, unsigned type, + enum amdgpu_interrupt_state state) +{ + struct amdgpu_vmhub *hub; + u32 tmp, reg, bits[AMDGPU_MAX_VMHUBS], i; + + bits[AMDGPU_GFXHUB_0] = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; + + bits[AMDGPU_MMHUB_0] = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + /* MM HUB */ + hub = &adev->vmhub[AMDGPU_MMHUB_0]; + for (i = 0; i < 16; i++) { + reg = hub->vm_context0_cntl + i; + tmp = RREG32(reg); + tmp &= ~bits[AMDGPU_MMHUB_0]; + WREG32(reg, tmp); + } + + /* GFX HUB */ + hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + for (i = 0; i < 16; i++) { + reg = hub->vm_context0_cntl + i; + tmp = RREG32(reg); + tmp &= ~bits[AMDGPU_GFXHUB_0]; + WREG32(reg, tmp); + } + break; + case AMDGPU_IRQ_STATE_ENABLE: + /* MM HUB */ + hub = &adev->vmhub[AMDGPU_MMHUB_0]; + for (i = 0; i < 16; i++) { + reg = hub->vm_context0_cntl + i; + tmp = RREG32(reg); + tmp |= bits[AMDGPU_MMHUB_0]; + WREG32(reg, tmp); + } + + /* GFX HUB */ + hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + for (i = 0; i < 16; i++) { + reg = hub->vm_context0_cntl + i; + tmp = RREG32(reg); + tmp |= bits[AMDGPU_GFXHUB_0]; + WREG32(reg, tmp); + } + break; + default: + break; + } + + return 0; +} + +static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src]; + uint32_t status = 0; + u64 addr; + + addr = (u64)entry->src_data[0] << 12; + addr |= ((u64)entry->src_data[1] & 0xf) << 44; + + if (!amdgpu_sriov_vf(adev)) { + /* + * Issue a dummy read to wait for the status register to + * be updated to avoid reading an incorrect value due to + * the new fast GRBM interface. + */ + if (entry->vmid_src == AMDGPU_GFXHUB_0) + RREG32(hub->vm_l2_pro_fault_status); + + status = RREG32(hub->vm_l2_pro_fault_status); + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + } + + if (printk_ratelimit()) { + struct amdgpu_task_info task_info; + + memset(&task_info, 0, sizeof(struct amdgpu_task_info)); + amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); + + dev_err(adev->dev, + "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, " + "for process %s pid %d thread %s pid %d)\n", + entry->vmid_src ? "mmhub" : "gfxhub", + entry->src_id, entry->ring_id, entry->vmid, + entry->pasid, task_info.process_name, task_info.tgid, + task_info.task_name, task_info.pid); + dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n", + addr, entry->client_id); + if (!amdgpu_sriov_vf(adev)) { + dev_err(adev->dev, + "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", + status); + dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); + dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); + dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); + dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); + dev_err(adev->dev, "\t RW: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, RW)); + } + } + + return 0; +} + +static const struct amdgpu_irq_src_funcs gmc_v10_0_irq_funcs = { + .set = gmc_v10_0_vm_fault_interrupt_state, + .process = gmc_v10_0_process_interrupt, +}; + +static void gmc_v10_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->gmc.vm_fault.num_types = 1; + adev->gmc.vm_fault.funcs = &gmc_v10_0_irq_funcs; +} + +static uint32_t gmc_v10_0_get_invalidate_req(unsigned int vmid, + uint32_t flush_type) +{ + u32 req = 0; + + /* invalidate using legacy mode on vmid*/ + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, + PER_VMID_INVALIDATE_REQ, 1 << vmid); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, + CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); + + return req; +} + +/* + * GART + * VMID 0 is the physical GPU addresses as used by the kernel. + * VMIDs 1-15 are used for userspace clients and are handled + * by the amdgpu vm/hsa code. + */ + +static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, + unsigned int vmhub, uint32_t flush_type) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; + u32 tmp = gmc_v10_0_get_invalidate_req(vmid, flush_type); + /* Use register 17 for GART */ + const unsigned eng = 17; + unsigned int i; + + spin_lock(&adev->gmc.invalidate_lock); + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) { + for (i = 0; i < adev->usec_timeout; i++) { + /* a read return value of 1 means semaphore acuqire */ + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + if (tmp & 0x1) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); + } + + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); + + /* + * Issue a dummy read to wait for the ACK register to be cleared + * to avoid a false ACK due to the new fast GRBM interface. + */ + if (vmhub == AMDGPU_GFXHUB_0) + RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng); + + /* Wait for ACK with a delay.*/ + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); + tmp &= 1 << vmid; + if (tmp) + break; + + udelay(1); + } + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); + + spin_unlock(&adev->gmc.invalidate_lock); + + if (i < adev->usec_timeout) + return; + + DRM_ERROR("Timeout waiting for VM flush ACK!\n"); +} + +/** + * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback + * + * @adev: amdgpu_device pointer + * @vmid: vm instance to flush + * + * Flush the TLB for the requested page table. + */ +static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) +{ + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + struct dma_fence *fence; + struct amdgpu_job *job; + + int r; + + /* flush hdp cache */ + adev->nbio.funcs->hdp_flush(adev, NULL); + + mutex_lock(&adev->mman.gtt_window_lock); + + if (vmhub == AMDGPU_MMHUB_0) { + gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0); + mutex_unlock(&adev->mman.gtt_window_lock); + return; + } + + BUG_ON(vmhub != AMDGPU_GFXHUB_0); + + if (!adev->mman.buffer_funcs_enabled || + !adev->ib_pool_ready || + adev->in_gpu_reset) { + gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0); + mutex_unlock(&adev->mman.gtt_window_lock); + return; + } + + /* The SDMA on Navi has a bug which can theoretically result in memory + * corruption if an invalidation happens at the same time as an VA + * translation. Avoid this by doing the invalidation from the SDMA + * itself. + */ + r = amdgpu_job_alloc_with_ib(adev, 16 * 4, &job); + if (r) + goto error_alloc; + + job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); + job->vm_needs_flush = true; + job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + r = amdgpu_job_submit(job, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, &fence); + if (r) + goto error_submit; + + mutex_unlock(&adev->mman.gtt_window_lock); + + dma_fence_wait(fence, false); + dma_fence_put(fence); + + return; + +error_submit: + amdgpu_job_free(job); + +error_alloc: + mutex_unlock(&adev->mman.gtt_window_lock); + DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); +} + +static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0); + unsigned eng = ring->vm_inv_eng; + + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || + ring->funcs->vmhub == AMDGPU_MMHUB_1) + /* a read return value of 1 means semaphore acuqire */ + amdgpu_ring_emit_reg_wait(ring, + hub->vm_inv_eng0_sem + eng, 0x1, 0x1); + + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), + lower_32_bits(pd_addr)); + + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), + upper_32_bits(pd_addr)); + + amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, + hub->vm_inv_eng0_ack + eng, + req, 1 << vmid); + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || + ring->funcs->vmhub == AMDGPU_MMHUB_1) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); + + return pd_addr; +} + +static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, + unsigned pasid) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reg; + + if (ring->funcs->vmhub == AMDGPU_GFXHUB_0) + reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; + else + reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; + + amdgpu_ring_emit_wreg(ring, reg, pasid); +} + +/* + * PTE format on NAVI 10: + * 63:59 reserved + * 58:57 reserved + * 56 F + * 55 L + * 54 reserved + * 53:52 SW + * 51 T + * 50:48 mtype + * 47:12 4k physical page base address + * 11:7 fragment + * 6 write + * 5 read + * 4 exe + * 3 Z + * 2 snooped + * 1 system + * 0 valid + * + * PDE format on NAVI 10: + * 63:59 block fragment size + * 58:55 reserved + * 54 P + * 53:48 reserved + * 47:6 physical base address of PD or PTE + * 5:3 reserved + * 2 C + * 1 system + * 0 valid + */ + +static uint64_t gmc_v10_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) +{ + switch (flags) { + case AMDGPU_VM_MTYPE_DEFAULT: + return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); + case AMDGPU_VM_MTYPE_NC: + return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); + case AMDGPU_VM_MTYPE_WC: + return AMDGPU_PTE_MTYPE_NV10(MTYPE_WC); + case AMDGPU_VM_MTYPE_CC: + return AMDGPU_PTE_MTYPE_NV10(MTYPE_CC); + case AMDGPU_VM_MTYPE_UC: + return AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); + default: + return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); + } +} + +static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level, + uint64_t *addr, uint64_t *flags) +{ + if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM)) + *addr = adev->vm_manager.vram_base_offset + *addr - + adev->gmc.vram_start; + BUG_ON(*addr & 0xFFFF00000000003FULL); + + if (!adev->gmc.translate_further) + return; + + if (level == AMDGPU_VM_PDB1) { + /* Set the block fragment size */ + if (!(*flags & AMDGPU_PDE_PTE)) + *flags |= AMDGPU_PDE_BFS(0x9); + + } else if (level == AMDGPU_VM_PDB0) { + if (*flags & AMDGPU_PDE_PTE) + *flags &= ~AMDGPU_PDE_PTE; + else + *flags |= AMDGPU_PTE_TF; + } +} + +static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping, + uint64_t *flags) +{ + *flags &= ~AMDGPU_PTE_EXECUTABLE; + *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + + *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK; + *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK); + + if (mapping->flags & AMDGPU_PTE_PRT) { + *flags |= AMDGPU_PTE_PRT; + *flags |= AMDGPU_PTE_SNOOPED; + *flags |= AMDGPU_PTE_LOG; + *flags |= AMDGPU_PTE_SYSTEM; + *flags &= ~AMDGPU_PTE_VALID; + } +} + +static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = { + .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb, + .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb, + .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping, + .map_mtype = gmc_v10_0_map_mtype, + .get_vm_pde = gmc_v10_0_get_vm_pde, + .get_vm_pte = gmc_v10_0_get_vm_pte +}; + +static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device *adev) +{ + if (adev->gmc.gmc_funcs == NULL) + adev->gmc.gmc_funcs = &gmc_v10_0_gmc_funcs; +} + +static int gmc_v10_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + gmc_v10_0_set_gmc_funcs(adev); + gmc_v10_0_set_irq_funcs(adev); + + adev->gmc.shared_aperture_start = 0x2000000000000000ULL; + adev->gmc.shared_aperture_end = + adev->gmc.shared_aperture_start + (4ULL << 30) - 1; + adev->gmc.private_aperture_start = 0x1000000000000000ULL; + adev->gmc.private_aperture_end = + adev->gmc.private_aperture_start + (4ULL << 30) - 1; + + return 0; +} + +static int gmc_v10_0_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 4, 4 }; + unsigned i; + + for(i = 0; i < adev->num_rings; ++i) { + struct amdgpu_ring *ring = adev->rings[i]; + unsigned vmhub = ring->funcs->vmhub; + + ring->vm_inv_eng = vm_inv_eng[vmhub]++; + dev_info(adev->dev, "ring %u(%s) uses VM inv eng %u on hub %u\n", + ring->idx, ring->name, ring->vm_inv_eng, + ring->funcs->vmhub); + } + + /* Engine 17 is used for GART flushes */ + for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i) + BUG_ON(vm_inv_eng[i] > 17); + + return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); +} + +static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, + struct amdgpu_gmc *mc) +{ + u64 base = 0; + + base = gfxhub_v2_0_get_fb_location(adev); + + amdgpu_gmc_vram_location(adev, &adev->gmc, base); + amdgpu_gmc_gart_location(adev, mc); + + /* base offset of vram pages */ + adev->vm_manager.vram_base_offset = gfxhub_v2_0_get_mc_fb_offset(adev); +} + +/** + * gmc_v10_0_mc_init - initialize the memory controller driver params + * + * @adev: amdgpu_device pointer + * + * Look up the amount of vram, vram width, and decide how to place + * vram and gart within the GPU's physical address space. + * Returns 0 for success. + */ +static int gmc_v10_0_mc_init(struct amdgpu_device *adev) +{ + /* Could aper size report 0 ? */ + adev->gmc.aper_base = pci_resource_start(adev->pdev, 0); + adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); + + /* size in MB on si */ + adev->gmc.mc_vram_size = + adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; + adev->gmc.real_vram_size = adev->gmc.mc_vram_size; + adev->gmc.visible_vram_size = adev->gmc.aper_size; + + /* In case the PCI BAR is larger than the actual amount of vram */ + if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size) + adev->gmc.visible_vram_size = adev->gmc.real_vram_size; + + /* set the gart size */ + if (amdgpu_gart_size == -1) { + switch (adev->asic_type) { + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + default: + adev->gmc.gart_size = 512ULL << 20; + break; + } + } else + adev->gmc.gart_size = (u64)amdgpu_gart_size << 20; + + gmc_v10_0_vram_gtt_location(adev, &adev->gmc); + + return 0; +} + +static int gmc_v10_0_gart_init(struct amdgpu_device *adev) +{ + int r; + + if (adev->gart.bo) { + WARN(1, "NAVI10 PCIE GART already initialized\n"); + return 0; + } + + /* Initialize common gart structure */ + r = amdgpu_gart_init(adev); + if (r) + return r; + + adev->gart.table_size = adev->gart.num_gpu_pages * 8; + adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(MTYPE_UC) | + AMDGPU_PTE_EXECUTABLE; + + return amdgpu_gart_table_vram_alloc(adev); +} + +static unsigned gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev) +{ + u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); + unsigned size; + + if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { + size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ + } else { + u32 viewport; + u32 pitch; + + viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION); + pitch = RREG32_SOC15(DCE, 0, mmHUBPREQ0_DCSURF_SURFACE_PITCH); + size = (REG_GET_FIELD(viewport, + HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) * + REG_GET_FIELD(pitch, HUBPREQ0_DCSURF_SURFACE_PITCH, PITCH) * + 4); + } + /* return 0 if the pre-OS buffer uses up most of vram */ + if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) { + DRM_ERROR("Warning: pre-OS buffer uses most of vram, \ + be aware of gart table overwrite\n"); + return 0; + } + + return size; +} + + + +static int gmc_v10_0_sw_init(void *handle) +{ + int r, vram_width = 0, vram_type = 0, vram_vendor = 0; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + gfxhub_v2_0_init(adev); + mmhub_v2_0_init(adev); + + spin_lock_init(&adev->gmc.invalidate_lock); + + r = amdgpu_atomfirmware_get_vram_info(adev, + &vram_width, &vram_type, &vram_vendor); + if (!amdgpu_emu_mode) + adev->gmc.vram_width = vram_width; + else + adev->gmc.vram_width = 1 * 128; /* numchan * chansize */ + + adev->gmc.vram_type = vram_type; + adev->gmc.vram_vendor = vram_vendor; + switch (adev->asic_type) { + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + adev->num_vmhubs = 2; + /* + * To fulfill 4-level page support, + * vm size is 256TB (48bit), maximum size of Navi10/Navi14/Navi12, + * block size 512 (9bit) + */ + amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + break; + default: + break; + } + + /* This interrupt is VMC page fault.*/ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, + VMC_1_0__SRCID__VM_FAULT, + &adev->gmc.vm_fault); + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, + UTCL2_1_0__SRCID__FAULT, + &adev->gmc.vm_fault); + if (r) + return r; + + /* + * Set the internal MC address mask This is the max address of the GPU's + * internal address space. + */ + adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ + + /* + * Reserve 8M stolen memory for navi10 like vega10 + * TODO: will check if it's really needed on asic. + */ + if (amdgpu_emu_mode == 1) + adev->gmc.stolen_size = 0; + else + adev->gmc.stolen_size = 9 * 1024 *1024; + + r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); + if (r) { + printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); + return r; + } + + r = gmc_v10_0_mc_init(adev); + if (r) + return r; + + adev->gmc.stolen_size = gmc_v10_0_get_vbios_fb_size(adev); + + /* Memory manager */ + r = amdgpu_bo_init(adev); + if (r) + return r; + + r = gmc_v10_0_gart_init(adev); + if (r) + return r; + + /* + * number of VMs + * VMID 0 is reserved for System + * amdgpu graphics/compute will use VMIDs 1-7 + * amdkfd will use VMIDs 8-15 + */ + adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; + + amdgpu_vm_manager_init(adev); + + return 0; +} + +/** + * gmc_v8_0_gart_fini - vm fini callback + * + * @adev: amdgpu_device pointer + * + * Tears down the driver GART/VM setup (CIK). + */ +static void gmc_v10_0_gart_fini(struct amdgpu_device *adev) +{ + amdgpu_gart_table_vram_free(adev); + amdgpu_gart_fini(adev); +} + +static int gmc_v10_0_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_vm_manager_fini(adev); + gmc_v10_0_gart_fini(adev); + amdgpu_gem_force_release(adev); + amdgpu_bo_fini(adev); + + return 0; +} + +static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + break; + default: + break; + } +} + +/** + * gmc_v10_0_gart_enable - gart enable + * + * @adev: amdgpu_device pointer + */ +static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) +{ + int r; + bool value; + u32 tmp; + + if (adev->gart.bo == NULL) { + dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); + return -EINVAL; + } + + r = amdgpu_gart_table_vram_pin(adev); + if (r) + return r; + + r = gfxhub_v2_0_gart_enable(adev); + if (r) + return r; + + r = mmhub_v2_0_gart_enable(adev); + if (r) + return r; + + tmp = RREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL); + tmp |= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK; + WREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL, tmp); + + tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL); + WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); + + /* Flush HDP after it is initialized */ + adev->nbio.funcs->hdp_flush(adev, NULL); + + value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? + false : true; + + gfxhub_v2_0_set_fault_enable_default(adev, value); + mmhub_v2_0_set_fault_enable_default(adev, value); + gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0); + gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0); + + DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", + (unsigned)(adev->gmc.gart_size >> 20), + (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); + + adev->gart.ready = true; + + return 0; +} + +static int gmc_v10_0_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* The sequence of these two function calls matters.*/ + gmc_v10_0_init_golden_registers(adev); + + r = gmc_v10_0_gart_enable(adev); + if (r) + return r; + + return 0; +} + +/** + * gmc_v10_0_gart_disable - gart disable + * + * @adev: amdgpu_device pointer + * + * This disables all VM page table. + */ +static void gmc_v10_0_gart_disable(struct amdgpu_device *adev) +{ + gfxhub_v2_0_gart_disable(adev); + mmhub_v2_0_gart_disable(adev); + amdgpu_gart_table_vram_unpin(adev); +} + +static int gmc_v10_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) { + /* full access mode, so don't touch any GMC register */ + DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); + return 0; + } + + amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); + gmc_v10_0_gart_disable(adev); + + return 0; +} + +static int gmc_v10_0_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + gmc_v10_0_hw_fini(adev); + + return 0; +} + +static int gmc_v10_0_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = gmc_v10_0_hw_init(adev); + if (r) + return r; + + amdgpu_vmid_reset_all(adev); + + return 0; +} + +static bool gmc_v10_0_is_idle(void *handle) +{ + /* MC is always ready in GMC v10.*/ + return true; +} + +static int gmc_v10_0_wait_for_idle(void *handle) +{ + /* There is no need to wait for MC idle in GMC v10.*/ + return 0; +} + +static int gmc_v10_0_soft_reset(void *handle) +{ + return 0; +} + +static int gmc_v10_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = mmhub_v2_0_set_clockgating(adev, state); + if (r) + return r; + + return athub_v2_0_set_clockgating(adev, state); +} + +static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + mmhub_v2_0_get_clockgating(adev, flags); + + athub_v2_0_get_clockgating(adev, flags); +} + +static int gmc_v10_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +const struct amd_ip_funcs gmc_v10_0_ip_funcs = { + .name = "gmc_v10_0", + .early_init = gmc_v10_0_early_init, + .late_init = gmc_v10_0_late_init, + .sw_init = gmc_v10_0_sw_init, + .sw_fini = gmc_v10_0_sw_fini, + .hw_init = gmc_v10_0_hw_init, + .hw_fini = gmc_v10_0_hw_fini, + .suspend = gmc_v10_0_suspend, + .resume = gmc_v10_0_resume, + .is_idle = gmc_v10_0_is_idle, + .wait_for_idle = gmc_v10_0_wait_for_idle, + .soft_reset = gmc_v10_0_soft_reset, + .set_clockgating_state = gmc_v10_0_set_clockgating_state, + .set_powergating_state = gmc_v10_0_set_powergating_state, + .get_clockgating_state = gmc_v10_0_get_clockgating_state, +}; + +const struct amdgpu_ip_block_version gmc_v10_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_GMC, + .major = 10, + .minor = 0, + .rev = 0, + .funcs = &gmc_v10_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.h new file mode 100644 index 000000000000..7daa53d8996c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __GMC_V10_0_H__ +#define __GMC_V10_0_H__ + +extern const struct amd_ip_funcs gmc_v10_0_ip_funcs; +extern const struct amdgpu_ip_block_version gmc_v10_0_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 9fc3296592fe..b205039350b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -20,8 +20,11 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ + #include <linux/firmware.h> -#include <drm/drmP.h> +#include <linux/module.h> +#include <linux/pci.h> + #include <drm/drm_cache.h> #include "amdgpu.h" #include "gmc_v6_0.h" @@ -225,7 +228,7 @@ static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev, u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; base <<= 24; - amdgpu_gmc_vram_location(adev, &adev->gmc, base); + amdgpu_gmc_vram_location(adev, mc, base); amdgpu_gmc_gart_location(adev, mc); } @@ -359,8 +362,8 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev) return 0; } -static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid, uint32_t flush_type) +static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) { WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); } @@ -383,41 +386,20 @@ static uint64_t gmc_v6_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, return pd_addr; } -static int gmc_v6_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, - uint32_t gpu_page_idx, uint64_t addr, - uint64_t flags) -{ - void __iomem *ptr = (void *)cpu_pt_addr; - uint64_t value; - - value = addr & 0xFFFFFFFFFFFFF000ULL; - value |= flags; - writeq(value, ptr + (gpu_page_idx * 8)); - - return 0; -} - -static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev, - uint32_t flags) -{ - uint64_t pte_flag = 0; - - if (flags & AMDGPU_VM_PAGE_READABLE) - pte_flag |= AMDGPU_PTE_READABLE; - if (flags & AMDGPU_VM_PAGE_WRITEABLE) - pte_flag |= AMDGPU_PTE_WRITEABLE; - if (flags & AMDGPU_VM_PAGE_PRT) - pte_flag |= AMDGPU_PTE_PRT; - - return pte_flag; -} - static void gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { BUG_ON(*addr & 0xFFFFFF0000000FFFULL); } +static void gmc_v6_0_get_vm_pte(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping, + uint64_t *flags) +{ + *flags &= ~AMDGPU_PTE_EXECUTABLE; + *flags &= ~AMDGPU_PTE_PRT; +} + static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) { @@ -582,7 +564,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) else gmc_v6_0_set_fault_enable_default(adev, true); - gmc_v6_0_flush_gpu_tlb(adev, 0, 0); + gmc_v6_0_flush_gpu_tlb(adev, 0, 0, 0); dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)table_addr); @@ -850,9 +832,10 @@ static unsigned gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev) static int gmc_v6_0_sw_init(void *handle) { int r; - int dma_bits; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->num_vmhubs = 1; + if (adev->flags & AMD_IS_APU) { adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; } else { @@ -873,20 +856,12 @@ static int gmc_v6_0_sw_init(void *handle) adev->gmc.mc_mask = 0xffffffffffULL; - adev->need_dma32 = false; - dma_bits = adev->need_dma32 ? 32 : 40; - r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); + r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); if (r) { - adev->need_dma32 = true; - dma_bits = 32; dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n"); + return r; } - r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); - if (r) { - pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); - dev_warn(adev->dev, "amdgpu: No coherent DMA available.\n"); - } - adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits); + adev->need_swiotlb = drm_need_swiotlb(44); r = gmc_v6_0_init_microcode(adev); if (r) { @@ -1169,10 +1144,9 @@ static const struct amd_ip_funcs gmc_v6_0_ip_funcs = { static const struct amdgpu_gmc_funcs gmc_v6_0_gmc_funcs = { .flush_gpu_tlb = gmc_v6_0_flush_gpu_tlb, .emit_flush_gpu_tlb = gmc_v6_0_emit_flush_gpu_tlb, - .set_pte_pde = gmc_v6_0_set_pte_pde, .set_prt = gmc_v6_0_set_prt, .get_vm_pde = gmc_v6_0_get_vm_pde, - .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags + .get_vm_pte = gmc_v6_0_get_vm_pte, }; static const struct amdgpu_irq_src_funcs gmc_v6_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 761dcfb2fec0..f08e5330642d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -20,8 +20,11 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ + #include <linux/firmware.h> -#include <drm/drmP.h> +#include <linux/module.h> +#include <linux/pci.h> + #include <drm/drm_cache.h> #include "amdgpu.h" #include "cikd.h" @@ -242,7 +245,7 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev, u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; base <<= 24; - amdgpu_gmc_vram_location(adev, &adev->gmc, base); + amdgpu_gmc_vram_location(adev, mc, base); amdgpu_gmc_gart_location(adev, mc); } @@ -430,8 +433,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) * * Flush the TLB for the requested page table (CIK). */ -static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid, uint32_t flush_type) +static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) { /* bits 0-15 are the VM contexts0-15 */ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); @@ -460,52 +463,20 @@ static void gmc_v7_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid); } -/** - * gmc_v7_0_set_pte_pde - update the page tables using MMIO - * - * @adev: amdgpu_device pointer - * @cpu_pt_addr: cpu address of the page table - * @gpu_page_idx: entry in the page table to update - * @addr: dst addr to write into pte/pde - * @flags: access flags - * - * Update the page tables using the CPU. - */ -static int gmc_v7_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, - uint32_t gpu_page_idx, uint64_t addr, - uint64_t flags) -{ - void __iomem *ptr = (void *)cpu_pt_addr; - uint64_t value; - - value = addr & 0xFFFFFFFFFFFFF000ULL; - value |= flags; - writeq(value, ptr + (gpu_page_idx * 8)); - - return 0; -} - -static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev, - uint32_t flags) -{ - uint64_t pte_flag = 0; - - if (flags & AMDGPU_VM_PAGE_READABLE) - pte_flag |= AMDGPU_PTE_READABLE; - if (flags & AMDGPU_VM_PAGE_WRITEABLE) - pte_flag |= AMDGPU_PTE_WRITEABLE; - if (flags & AMDGPU_VM_PAGE_PRT) - pte_flag |= AMDGPU_PTE_PRT; - - return pte_flag; -} - static void gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { BUG_ON(*addr & 0xFFFFFF0000000FFFULL); } +static void gmc_v7_0_get_vm_pte(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping, + uint64_t *flags) +{ + *flags &= ~AMDGPU_PTE_EXECUTABLE; + *flags &= ~AMDGPU_PTE_PRT; +} + /** * gmc_v8_0_set_fault_enable_default - update VM fault handling * @@ -699,7 +670,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) WREG32(mmCHUB_CONTROL, tmp); } - gmc_v7_0_flush_gpu_tlb(adev, 0, 0); + gmc_v7_0_flush_gpu_tlb(adev, 0, 0, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)table_addr); @@ -981,9 +952,10 @@ static unsigned gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev) static int gmc_v7_0_sw_init(void *handle) { int r; - int dma_bits; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->num_vmhubs = 1; + if (adev->flags & AMD_IS_APU) { adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; } else { @@ -1012,25 +984,12 @@ static int gmc_v7_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ - /* set DMA mask + need_dma32 flags. - * PCIE - can handle 40-bits. - * IGP - can handle 40-bits - * PCI - dma32 for legacy pci gart, 40 bits on newer asics - */ - adev->need_dma32 = false; - dma_bits = adev->need_dma32 ? 32 : 40; - r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); + r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(40)); if (r) { - adev->need_dma32 = true; - dma_bits = 32; pr_warn("amdgpu: No suitable DMA available\n"); + return r; } - r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); - if (r) { - pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); - pr_warn("amdgpu: No coherent DMA available\n"); - } - adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits); + adev->need_swiotlb = drm_need_swiotlb(40); r = gmc_v7_0_init_microcode(adev); if (r) { @@ -1376,10 +1335,9 @@ static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = { .flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb, .emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping, - .set_pte_pde = gmc_v7_0_set_pte_pde, .set_prt = gmc_v7_0_set_prt, - .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags, - .get_vm_pde = gmc_v7_0_get_vm_pde + .get_vm_pde = gmc_v7_0_get_vm_pde, + .get_vm_pte = gmc_v7_0_get_vm_pte }; static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 34440672f938..6d96d40fbcb8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -20,8 +20,11 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ + #include <linux/firmware.h> -#include <drm/drmP.h> +#include <linux/module.h> +#include <linux/pci.h> + #include <drm/drm_cache.h> #include "amdgpu.h" #include "gmc_v8_0.h" @@ -289,7 +292,7 @@ out: * * @adev: amdgpu_device pointer * - * Load the GDDR MC ucode into the hw (CIK). + * Load the GDDR MC ucode into the hw (VI). * Returns 0 on success, error on failure. */ static int gmc_v8_0_tonga_mc_load_microcode(struct amdgpu_device *adev) @@ -433,7 +436,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev, base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; base <<= 24; - amdgpu_gmc_vram_location(adev, &adev->gmc, base); + amdgpu_gmc_vram_location(adev, mc, base); amdgpu_gmc_gart_location(adev, mc); } @@ -443,7 +446,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev, * @adev: amdgpu_device pointer * * Set the location of vram, gart, and AGP in the GPU's - * physical address space (CIK). + * physical address space (VI). */ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) { @@ -515,7 +518,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * * Look up the amount of vram, vram width, and decide how to place - * vram and gart within the GPU's physical address space (CIK). + * vram and gart within the GPU's physical address space (VI). * Returns 0 for success. */ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) @@ -630,10 +633,10 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * @vmid: vm instance to flush * - * Flush the TLB for the requested page table (CIK). + * Flush the TLB for the requested page table (VI). */ -static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid, uint32_t flush_type) +static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) { /* bits 0-15 are the VM contexts0-15 */ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); @@ -662,67 +665,26 @@ static void gmc_v8_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid); } -/** - * gmc_v8_0_set_pte_pde - update the page tables using MMIO - * - * @adev: amdgpu_device pointer - * @cpu_pt_addr: cpu address of the page table - * @gpu_page_idx: entry in the page table to update - * @addr: dst addr to write into pte/pde - * @flags: access flags +/* + * PTE format on VI: + * 63:40 reserved + * 39:12 4k physical page base address + * 11:7 fragment + * 6 write + * 5 read + * 4 exe + * 3 reserved + * 2 snooped + * 1 system + * 0 valid * - * Update the page tables using the CPU. + * PDE format on VI: + * 63:59 block fragment size + * 58:40 reserved + * 39:1 physical base address of PTE + * bits 5:1 must be 0. + * 0 valid */ -static int gmc_v8_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, - uint32_t gpu_page_idx, uint64_t addr, - uint64_t flags) -{ - void __iomem *ptr = (void *)cpu_pt_addr; - uint64_t value; - - /* - * PTE format on VI: - * 63:40 reserved - * 39:12 4k physical page base address - * 11:7 fragment - * 6 write - * 5 read - * 4 exe - * 3 reserved - * 2 snooped - * 1 system - * 0 valid - * - * PDE format on VI: - * 63:59 block fragment size - * 58:40 reserved - * 39:1 physical base address of PTE - * bits 5:1 must be 0. - * 0 valid - */ - value = addr & 0x000000FFFFFFF000ULL; - value |= flags; - writeq(value, ptr + (gpu_page_idx * 8)); - - return 0; -} - -static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev, - uint32_t flags) -{ - uint64_t pte_flag = 0; - - if (flags & AMDGPU_VM_PAGE_EXECUTABLE) - pte_flag |= AMDGPU_PTE_EXECUTABLE; - if (flags & AMDGPU_VM_PAGE_READABLE) - pte_flag |= AMDGPU_PTE_READABLE; - if (flags & AMDGPU_VM_PAGE_WRITEABLE) - pte_flag |= AMDGPU_PTE_WRITEABLE; - if (flags & AMDGPU_VM_PAGE_PRT) - pte_flag |= AMDGPU_PTE_PRT; - - return pte_flag; -} static void gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) @@ -730,6 +692,15 @@ static void gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, int level, BUG_ON(*addr & 0xFFFFFF0000000FFFULL); } +static void gmc_v8_0_get_vm_pte(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping, + uint64_t *flags) +{ + *flags &= ~AMDGPU_PTE_EXECUTABLE; + *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + *flags &= ~AMDGPU_PTE_PRT; +} + /** * gmc_v8_0_set_fault_enable_default - update VM fault handling * @@ -824,7 +795,7 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable) * This sets up the TLBs, programs the page tables for VMID0, * sets up the hw for VMIDs 1-15 which are allocated on * demand, and sets up the global locations for the LDS, GDS, - * and GPUVM for FSA64 clients (CIK). + * and GPUVM for FSA64 clients (VI). * Returns 0 for success, errors for failure. */ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) @@ -942,7 +913,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) else gmc_v8_0_set_fault_enable_default(adev, true); - gmc_v8_0_flush_gpu_tlb(adev, 0, 0); + gmc_v8_0_flush_gpu_tlb(adev, 0, 0, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)table_addr); @@ -972,7 +943,7 @@ static int gmc_v8_0_gart_init(struct amdgpu_device *adev) * * @adev: amdgpu_device pointer * - * This disables all VM page table (CIK). + * This disables all VM page table (VI). */ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev) { @@ -1002,7 +973,7 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev) * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value * - * Print human readable fault information (CIK). + * Print human readable fault information (VI). */ static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, u32 status, u32 addr, u32 mc_client, unsigned pasid) @@ -1100,9 +1071,10 @@ static unsigned gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev) static int gmc_v8_0_sw_init(void *handle) { int r; - int dma_bits; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->num_vmhubs = 1; + if (adev->flags & AMD_IS_APU) { adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; } else { @@ -1137,25 +1109,12 @@ static int gmc_v8_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ - /* set DMA mask + need_dma32 flags. - * PCIE - can handle 40-bits. - * IGP - can handle 40-bits - * PCI - dma32 for legacy pci gart, 40 bits on newer asics - */ - adev->need_dma32 = false; - dma_bits = adev->need_dma32 ? 32 : 40; - r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); + r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(40)); if (r) { - adev->need_dma32 = true; - dma_bits = 32; pr_warn("amdgpu: No suitable DMA available\n"); + return r; } - r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); - if (r) { - pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); - pr_warn("amdgpu: No coherent DMA available\n"); - } - adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits); + adev->need_swiotlb = drm_need_swiotlb(40); r = gmc_v8_0_init_microcode(adev); if (r) { @@ -1743,10 +1702,9 @@ static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = { .flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb, .emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping, - .set_pte_pde = gmc_v8_0_set_pte_pde, .set_prt = gmc_v8_0_set_prt, - .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags, - .get_vm_pde = gmc_v8_0_get_vm_pde + .get_vm_pde = gmc_v8_0_get_vm_pde, + .get_vm_pte = gmc_v8_0_get_vm_pte }; static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 2fe8397241ea..3c355fb5d2b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -20,8 +20,12 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ + #include <linux/firmware.h> +#include <linux/pci.h> + #include <drm/drm_cache.h> + #include "amdgpu.h" #include "gmc_v9_0.h" #include "amdgpu_atomfirmware.h" @@ -43,10 +47,17 @@ #include "gfxhub_v1_0.h" #include "mmhub_v1_0.h" +#include "athub_v1_0.h" #include "gfxhub_v1_1.h" +#include "mmhub_v9_4.h" +#include "umc_v6_1.h" +#include "umc_v6_0.h" #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" +#include "amdgpu_ras.h" +#include "amdgpu_xgmi.h" + /* add these here since we already include dce12 headers and these are for DCN */ #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2 @@ -84,121 +95,156 @@ static const struct soc15_reg_golden golden_settings_athub_1_0_0[] = SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL2, 0x00ff00ff, 0x00080008) }; -/* Ecc related register addresses, (BASE + reg offset) */ -/* Universal Memory Controller caps (may be fused). */ -/* UMCCH:UmcLocalCap */ -#define UMCLOCALCAPS_ADDR0 (0x00014306 + 0x00000000) -#define UMCLOCALCAPS_ADDR1 (0x00014306 + 0x00000800) -#define UMCLOCALCAPS_ADDR2 (0x00014306 + 0x00001000) -#define UMCLOCALCAPS_ADDR3 (0x00014306 + 0x00001800) -#define UMCLOCALCAPS_ADDR4 (0x00054306 + 0x00000000) -#define UMCLOCALCAPS_ADDR5 (0x00054306 + 0x00000800) -#define UMCLOCALCAPS_ADDR6 (0x00054306 + 0x00001000) -#define UMCLOCALCAPS_ADDR7 (0x00054306 + 0x00001800) -#define UMCLOCALCAPS_ADDR8 (0x00094306 + 0x00000000) -#define UMCLOCALCAPS_ADDR9 (0x00094306 + 0x00000800) -#define UMCLOCALCAPS_ADDR10 (0x00094306 + 0x00001000) -#define UMCLOCALCAPS_ADDR11 (0x00094306 + 0x00001800) -#define UMCLOCALCAPS_ADDR12 (0x000d4306 + 0x00000000) -#define UMCLOCALCAPS_ADDR13 (0x000d4306 + 0x00000800) -#define UMCLOCALCAPS_ADDR14 (0x000d4306 + 0x00001000) -#define UMCLOCALCAPS_ADDR15 (0x000d4306 + 0x00001800) - -/* Universal Memory Controller Channel config. */ -/* UMCCH:UMC_CONFIG */ -#define UMCCH_UMC_CONFIG_ADDR0 (0x00014040 + 0x00000000) -#define UMCCH_UMC_CONFIG_ADDR1 (0x00014040 + 0x00000800) -#define UMCCH_UMC_CONFIG_ADDR2 (0x00014040 + 0x00001000) -#define UMCCH_UMC_CONFIG_ADDR3 (0x00014040 + 0x00001800) -#define UMCCH_UMC_CONFIG_ADDR4 (0x00054040 + 0x00000000) -#define UMCCH_UMC_CONFIG_ADDR5 (0x00054040 + 0x00000800) -#define UMCCH_UMC_CONFIG_ADDR6 (0x00054040 + 0x00001000) -#define UMCCH_UMC_CONFIG_ADDR7 (0x00054040 + 0x00001800) -#define UMCCH_UMC_CONFIG_ADDR8 (0x00094040 + 0x00000000) -#define UMCCH_UMC_CONFIG_ADDR9 (0x00094040 + 0x00000800) -#define UMCCH_UMC_CONFIG_ADDR10 (0x00094040 + 0x00001000) -#define UMCCH_UMC_CONFIG_ADDR11 (0x00094040 + 0x00001800) -#define UMCCH_UMC_CONFIG_ADDR12 (0x000d4040 + 0x00000000) -#define UMCCH_UMC_CONFIG_ADDR13 (0x000d4040 + 0x00000800) -#define UMCCH_UMC_CONFIG_ADDR14 (0x000d4040 + 0x00001000) -#define UMCCH_UMC_CONFIG_ADDR15 (0x000d4040 + 0x00001800) - -/* Universal Memory Controller Channel Ecc config. */ -/* UMCCH:EccCtrl */ -#define UMCCH_ECCCTRL_ADDR0 (0x00014053 + 0x00000000) -#define UMCCH_ECCCTRL_ADDR1 (0x00014053 + 0x00000800) -#define UMCCH_ECCCTRL_ADDR2 (0x00014053 + 0x00001000) -#define UMCCH_ECCCTRL_ADDR3 (0x00014053 + 0x00001800) -#define UMCCH_ECCCTRL_ADDR4 (0x00054053 + 0x00000000) -#define UMCCH_ECCCTRL_ADDR5 (0x00054053 + 0x00000800) -#define UMCCH_ECCCTRL_ADDR6 (0x00054053 + 0x00001000) -#define UMCCH_ECCCTRL_ADDR7 (0x00054053 + 0x00001800) -#define UMCCH_ECCCTRL_ADDR8 (0x00094053 + 0x00000000) -#define UMCCH_ECCCTRL_ADDR9 (0x00094053 + 0x00000800) -#define UMCCH_ECCCTRL_ADDR10 (0x00094053 + 0x00001000) -#define UMCCH_ECCCTRL_ADDR11 (0x00094053 + 0x00001800) -#define UMCCH_ECCCTRL_ADDR12 (0x000d4053 + 0x00000000) -#define UMCCH_ECCCTRL_ADDR13 (0x000d4053 + 0x00000800) -#define UMCCH_ECCCTRL_ADDR14 (0x000d4053 + 0x00001000) -#define UMCCH_ECCCTRL_ADDR15 (0x000d4053 + 0x00001800) - -static const uint32_t ecc_umclocalcap_addrs[] = { - UMCLOCALCAPS_ADDR0, - UMCLOCALCAPS_ADDR1, - UMCLOCALCAPS_ADDR2, - UMCLOCALCAPS_ADDR3, - UMCLOCALCAPS_ADDR4, - UMCLOCALCAPS_ADDR5, - UMCLOCALCAPS_ADDR6, - UMCLOCALCAPS_ADDR7, - UMCLOCALCAPS_ADDR8, - UMCLOCALCAPS_ADDR9, - UMCLOCALCAPS_ADDR10, - UMCLOCALCAPS_ADDR11, - UMCLOCALCAPS_ADDR12, - UMCLOCALCAPS_ADDR13, - UMCLOCALCAPS_ADDR14, - UMCLOCALCAPS_ADDR15, +static const uint32_t ecc_umc_mcumc_ctrl_addrs[] = { + (0x000143c0 + 0x00000000), + (0x000143c0 + 0x00000800), + (0x000143c0 + 0x00001000), + (0x000143c0 + 0x00001800), + (0x000543c0 + 0x00000000), + (0x000543c0 + 0x00000800), + (0x000543c0 + 0x00001000), + (0x000543c0 + 0x00001800), + (0x000943c0 + 0x00000000), + (0x000943c0 + 0x00000800), + (0x000943c0 + 0x00001000), + (0x000943c0 + 0x00001800), + (0x000d43c0 + 0x00000000), + (0x000d43c0 + 0x00000800), + (0x000d43c0 + 0x00001000), + (0x000d43c0 + 0x00001800), + (0x001143c0 + 0x00000000), + (0x001143c0 + 0x00000800), + (0x001143c0 + 0x00001000), + (0x001143c0 + 0x00001800), + (0x001543c0 + 0x00000000), + (0x001543c0 + 0x00000800), + (0x001543c0 + 0x00001000), + (0x001543c0 + 0x00001800), + (0x001943c0 + 0x00000000), + (0x001943c0 + 0x00000800), + (0x001943c0 + 0x00001000), + (0x001943c0 + 0x00001800), + (0x001d43c0 + 0x00000000), + (0x001d43c0 + 0x00000800), + (0x001d43c0 + 0x00001000), + (0x001d43c0 + 0x00001800), }; -static const uint32_t ecc_umcch_umc_config_addrs[] = { - UMCCH_UMC_CONFIG_ADDR0, - UMCCH_UMC_CONFIG_ADDR1, - UMCCH_UMC_CONFIG_ADDR2, - UMCCH_UMC_CONFIG_ADDR3, - UMCCH_UMC_CONFIG_ADDR4, - UMCCH_UMC_CONFIG_ADDR5, - UMCCH_UMC_CONFIG_ADDR6, - UMCCH_UMC_CONFIG_ADDR7, - UMCCH_UMC_CONFIG_ADDR8, - UMCCH_UMC_CONFIG_ADDR9, - UMCCH_UMC_CONFIG_ADDR10, - UMCCH_UMC_CONFIG_ADDR11, - UMCCH_UMC_CONFIG_ADDR12, - UMCCH_UMC_CONFIG_ADDR13, - UMCCH_UMC_CONFIG_ADDR14, - UMCCH_UMC_CONFIG_ADDR15, +static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = { + (0x000143e0 + 0x00000000), + (0x000143e0 + 0x00000800), + (0x000143e0 + 0x00001000), + (0x000143e0 + 0x00001800), + (0x000543e0 + 0x00000000), + (0x000543e0 + 0x00000800), + (0x000543e0 + 0x00001000), + (0x000543e0 + 0x00001800), + (0x000943e0 + 0x00000000), + (0x000943e0 + 0x00000800), + (0x000943e0 + 0x00001000), + (0x000943e0 + 0x00001800), + (0x000d43e0 + 0x00000000), + (0x000d43e0 + 0x00000800), + (0x000d43e0 + 0x00001000), + (0x000d43e0 + 0x00001800), + (0x001143e0 + 0x00000000), + (0x001143e0 + 0x00000800), + (0x001143e0 + 0x00001000), + (0x001143e0 + 0x00001800), + (0x001543e0 + 0x00000000), + (0x001543e0 + 0x00000800), + (0x001543e0 + 0x00001000), + (0x001543e0 + 0x00001800), + (0x001943e0 + 0x00000000), + (0x001943e0 + 0x00000800), + (0x001943e0 + 0x00001000), + (0x001943e0 + 0x00001800), + (0x001d43e0 + 0x00000000), + (0x001d43e0 + 0x00000800), + (0x001d43e0 + 0x00001000), + (0x001d43e0 + 0x00001800), }; -static const uint32_t ecc_umcch_eccctrl_addrs[] = { - UMCCH_ECCCTRL_ADDR0, - UMCCH_ECCCTRL_ADDR1, - UMCCH_ECCCTRL_ADDR2, - UMCCH_ECCCTRL_ADDR3, - UMCCH_ECCCTRL_ADDR4, - UMCCH_ECCCTRL_ADDR5, - UMCCH_ECCCTRL_ADDR6, - UMCCH_ECCCTRL_ADDR7, - UMCCH_ECCCTRL_ADDR8, - UMCCH_ECCCTRL_ADDR9, - UMCCH_ECCCTRL_ADDR10, - UMCCH_ECCCTRL_ADDR11, - UMCCH_ECCCTRL_ADDR12, - UMCCH_ECCCTRL_ADDR13, - UMCCH_ECCCTRL_ADDR14, - UMCCH_ECCCTRL_ADDR15, +static const uint32_t ecc_umc_mcumc_status_addrs[] = { + (0x000143c2 + 0x00000000), + (0x000143c2 + 0x00000800), + (0x000143c2 + 0x00001000), + (0x000143c2 + 0x00001800), + (0x000543c2 + 0x00000000), + (0x000543c2 + 0x00000800), + (0x000543c2 + 0x00001000), + (0x000543c2 + 0x00001800), + (0x000943c2 + 0x00000000), + (0x000943c2 + 0x00000800), + (0x000943c2 + 0x00001000), + (0x000943c2 + 0x00001800), + (0x000d43c2 + 0x00000000), + (0x000d43c2 + 0x00000800), + (0x000d43c2 + 0x00001000), + (0x000d43c2 + 0x00001800), + (0x001143c2 + 0x00000000), + (0x001143c2 + 0x00000800), + (0x001143c2 + 0x00001000), + (0x001143c2 + 0x00001800), + (0x001543c2 + 0x00000000), + (0x001543c2 + 0x00000800), + (0x001543c2 + 0x00001000), + (0x001543c2 + 0x00001800), + (0x001943c2 + 0x00000000), + (0x001943c2 + 0x00000800), + (0x001943c2 + 0x00001000), + (0x001943c2 + 0x00001800), + (0x001d43c2 + 0x00000000), + (0x001d43c2 + 0x00000800), + (0x001d43c2 + 0x00001000), + (0x001d43c2 + 0x00001800), }; +static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 bits, i, tmp, reg; + + bits = 0x7f; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) { + reg = ecc_umc_mcumc_ctrl_addrs[i]; + tmp = RREG32(reg); + tmp &= ~bits; + WREG32(reg, tmp); + } + for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) { + reg = ecc_umc_mcumc_ctrl_mask_addrs[i]; + tmp = RREG32(reg); + tmp &= ~bits; + WREG32(reg, tmp); + } + break; + case AMDGPU_IRQ_STATE_ENABLE: + for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) { + reg = ecc_umc_mcumc_ctrl_addrs[i]; + tmp = RREG32(reg); + tmp |= bits; + WREG32(reg, tmp); + } + for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) { + reg = ecc_umc_mcumc_ctrl_mask_addrs[i]; + tmp = RREG32(reg); + tmp |= bits; + WREG32(reg, tmp); + } + break; + default: + break; + } + + return 0; +} + static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, @@ -217,7 +263,7 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - for (j = 0; j < AMDGPU_MAX_VMHUBS; j++) { + for (j = 0; j < adev->num_vmhubs; j++) { hub = &adev->vmhub[j]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; @@ -228,7 +274,7 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, } break; case AMDGPU_IRQ_STATE_ENABLE: - for (j = 0; j < AMDGPU_MAX_VMHUBS; j++) { + for (j = 0; j < adev->num_vmhubs; j++) { hub = &adev->vmhub[j]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; @@ -244,78 +290,48 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, return 0; } -/** - * vega10_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool gmc_v9_0_prescreen_iv(struct amdgpu_device *adev, - struct amdgpu_iv_entry *entry, - uint64_t addr) -{ - struct amdgpu_vm *vm; - u64 key; - int r; - - /* No PASID, can't identify faulting process */ - if (!entry->pasid) - return true; - - /* Not a retry fault */ - if (!(entry->src_data[1] & 0x80)) - return true; - - /* Track retry faults in per-VM fault FIFO. */ - spin_lock(&adev->vm_manager.pasid_lock); - vm = idr_find(&adev->vm_manager.pasid_idr, entry->pasid); - if (!vm) { - /* VM not found, process it normally */ - spin_unlock(&adev->vm_manager.pasid_lock); - return true; - } - - key = AMDGPU_VM_FAULT(entry->pasid, addr); - r = amdgpu_vm_add_fault(vm->fault_hash, key); - - /* Hash table is full or the fault is already being processed, - * ignore further page faults - */ - if (r != 0) { - spin_unlock(&adev->vm_manager.pasid_lock); - return false; - } - /* No locking required with single writer and single reader */ - r = kfifo_put(&vm->faults, key); - if (!r) { - /* FIFO is full. Ignore it until there is space */ - amdgpu_vm_clear_fault(vm->fault_hash, key); - spin_unlock(&adev->vm_manager.pasid_lock); - return false; - } - - spin_unlock(&adev->vm_manager.pasid_lock); - /* It's the first fault for this address, process it normally */ - return true; -} - static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src]; + struct amdgpu_vmhub *hub; bool retry_fault = !!(entry->src_data[1] & 0x80); uint32_t status = 0; u64 addr; + char hub_name[10]; addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; - if (!gmc_v9_0_prescreen_iv(adev, entry, addr)) + if (retry_fault && amdgpu_gmc_filter_faults(adev, addr, entry->pasid, + entry->timestamp)) + return 1; /* This also prevents sending it to KFD */ + + if (entry->client_id == SOC15_IH_CLIENTID_VMC) { + snprintf(hub_name, sizeof(hub_name), "mmhub0"); + hub = &adev->vmhub[AMDGPU_MMHUB_0]; + } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) { + snprintf(hub_name, sizeof(hub_name), "mmhub1"); + hub = &adev->vmhub[AMDGPU_MMHUB_1]; + } else { + snprintf(hub_name, sizeof(hub_name), "gfxhub0"); + hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + } + + /* If it's the first fault for this address, process it normally */ + if (retry_fault && !in_interrupt() && + amdgpu_vm_handle_fault(adev, entry->pasid, addr)) return 1; /* This also prevents sending it to KFD */ if (!amdgpu_sriov_vf(adev)) { + /* + * Issue a dummy read to wait for the status register to + * be updated to avoid reading an incorrect value due to + * the new fast GRBM interface. + */ + if (entry->vmid_src == AMDGPU_GFXHUB_0) + RREG32(hub->vm_l2_pro_fault_status); + status = RREG32(hub->vm_l2_pro_fault_status); WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); } @@ -329,17 +345,33 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, dev_err(adev->dev, "[%s] %s page fault (src_id:%u ring:%u vmid:%u " "pasid:%u, for process %s pid %d thread %s pid %d)\n", - entry->vmid_src ? "mmhub" : "gfxhub", - retry_fault ? "retry" : "no-retry", + hub_name, retry_fault ? "retry" : "no-retry", entry->src_id, entry->ring_id, entry->vmid, entry->pasid, task_info.process_name, task_info.tgid, task_info.task_name, task_info.pid); - dev_err(adev->dev, " in page starting at address 0x%016llx from %d\n", + dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n", addr, entry->client_id); - if (!amdgpu_sriov_vf(adev)) + if (!amdgpu_sriov_vf(adev)) { dev_err(adev->dev, "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); + dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); + dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); + dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); + dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); + dev_err(adev->dev, "\t RW: 0x%lx\n", + REG_GET_FIELD(status, + VM_L2_PROTECTION_FAULT_STATUS, RW)); + + } } return 0; @@ -350,10 +382,19 @@ static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = { .process = gmc_v9_0_process_interrupt, }; + +static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs = { + .set = gmc_v9_0_ecc_interrupt_state, + .process = amdgpu_umc_process_ecc_irq, +}; + static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev) { adev->gmc.vm_fault.num_types = 1; adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs; + + adev->gmc.ecc_irq.num_types = 1; + adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs; } static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid, @@ -391,44 +432,87 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid, * * Flush the TLB for the requested page table using certain type. */ -static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid, uint32_t flush_type) +static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) { const unsigned eng = 17; - unsigned i, j; + u32 j, tmp; + struct amdgpu_vmhub *hub; - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { - struct amdgpu_vmhub *hub = &adev->vmhub[i]; - u32 tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type); + BUG_ON(vmhub >= adev->num_vmhubs); - /* This is necessary for a HW workaround under SRIOV as well - * as GFXOFF under bare metal - */ - if (adev->gfx.kiq.ring.sched.ready && - (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && - !adev->in_gpu_reset) { - uint32_t req = hub->vm_inv_eng0_req + eng; - uint32_t ack = hub->vm_inv_eng0_ack + eng; - - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp, - 1 << vmid); - continue; - } + hub = &adev->vmhub[vmhub]; + tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type); + + /* This is necessary for a HW workaround under SRIOV as well + * as GFXOFF under bare metal + */ + if (adev->gfx.kiq.ring.sched.ready && + (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && + !adev->in_gpu_reset) { + uint32_t req = hub->vm_inv_eng0_req + eng; + uint32_t ack = hub->vm_inv_eng0_ack + eng; + + amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp, + 1 << vmid); + return; + } + + spin_lock(&adev->gmc.invalidate_lock); + + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ - spin_lock(&adev->gmc.invalidate_lock); - WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) { for (j = 0; j < adev->usec_timeout; j++) { - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); - if (tmp & (1 << vmid)) + /* a read return value of 1 means semaphore acuqire */ + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + if (tmp & 0x1) break; udelay(1); } - spin_unlock(&adev->gmc.invalidate_lock); - if (j < adev->usec_timeout) - continue; - DRM_ERROR("Timeout waiting for VM flush ACK!\n"); + if (j >= adev->usec_timeout) + DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); } + + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); + + /* + * Issue a dummy read to wait for the ACK register to be cleared + * to avoid a false ACK due to the new fast GRBM interface. + */ + if (vmhub == AMDGPU_GFXHUB_0) + RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng); + + for (j = 0; j < adev->usec_timeout; j++) { + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); + if (tmp & (1 << vmid)) + break; + udelay(1); + } + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); + + spin_unlock(&adev->gmc.invalidate_lock); + + if (j < adev->usec_timeout) + return; + + DRM_ERROR("Timeout waiting for VM flush ACK!\n"); } static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, @@ -439,6 +523,20 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0); unsigned eng = ring->vm_inv_eng; + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || + ring->funcs->vmhub == AMDGPU_MMHUB_1) + /* a read return value of 1 means semaphore acuqire */ + amdgpu_ring_emit_reg_wait(ring, + hub->vm_inv_eng0_sem + eng, 0x1, 0x1); + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), lower_32_bits(pd_addr)); @@ -449,6 +547,15 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, hub->vm_inv_eng0_ack + eng, req, 1 << vmid); + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || + ring->funcs->vmhub == AMDGPU_MMHUB_1) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); + return pd_addr; } @@ -458,7 +565,11 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, struct amdgpu_device *adev = ring->adev; uint32_t reg; - if (ring->funcs->vmhub == AMDGPU_GFXHUB) + /* Do nothing because there's no lut register for mmhub1. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_1) + return; + + if (ring->funcs->vmhub == AMDGPU_GFXHUB_0) reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; else reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; @@ -466,103 +577,57 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, amdgpu_ring_emit_wreg(ring, reg, pasid); } -/** - * gmc_v9_0_set_pte_pde - update the page tables using MMIO - * - * @adev: amdgpu_device pointer - * @cpu_pt_addr: cpu address of the page table - * @gpu_page_idx: entry in the page table to update - * @addr: dst addr to write into pte/pde - * @flags: access flags +/* + * PTE format on VEGA 10: + * 63:59 reserved + * 58:57 mtype + * 56 F + * 55 L + * 54 P + * 53 SW + * 52 T + * 50:48 reserved + * 47:12 4k physical page base address + * 11:7 fragment + * 6 write + * 5 read + * 4 exe + * 3 Z + * 2 snooped + * 1 system + * 0 valid * - * Update the page tables using the CPU. + * PDE format on VEGA 10: + * 63:59 block fragment size + * 58:55 reserved + * 54 P + * 53:48 reserved + * 47:6 physical base address of PD or PTE + * 5:3 reserved + * 2 C + * 1 system + * 0 valid */ -static int gmc_v9_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, - uint32_t gpu_page_idx, uint64_t addr, - uint64_t flags) -{ - void __iomem *ptr = (void *)cpu_pt_addr; - uint64_t value; - /* - * PTE format on VEGA 10: - * 63:59 reserved - * 58:57 mtype - * 56 F - * 55 L - * 54 P - * 53 SW - * 52 T - * 50:48 reserved - * 47:12 4k physical page base address - * 11:7 fragment - * 6 write - * 5 read - * 4 exe - * 3 Z - * 2 snooped - * 1 system - * 0 valid - * - * PDE format on VEGA 10: - * 63:59 block fragment size - * 58:55 reserved - * 54 P - * 53:48 reserved - * 47:6 physical base address of PD or PTE - * 5:3 reserved - * 2 C - * 1 system - * 0 valid - */ - - /* - * The following is for PTE only. GART does not have PDEs. - */ - value = addr & 0x0000FFFFFFFFF000ULL; - value |= flags; - writeq(value, ptr + (gpu_page_idx * 8)); - return 0; -} - -static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev, - uint32_t flags) +static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) { - uint64_t pte_flag = 0; - - if (flags & AMDGPU_VM_PAGE_EXECUTABLE) - pte_flag |= AMDGPU_PTE_EXECUTABLE; - if (flags & AMDGPU_VM_PAGE_READABLE) - pte_flag |= AMDGPU_PTE_READABLE; - if (flags & AMDGPU_VM_PAGE_WRITEABLE) - pte_flag |= AMDGPU_PTE_WRITEABLE; - - switch (flags & AMDGPU_VM_MTYPE_MASK) { + switch (flags) { case AMDGPU_VM_MTYPE_DEFAULT: - pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_NC); - break; + return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); case AMDGPU_VM_MTYPE_NC: - pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_NC); - break; + return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); case AMDGPU_VM_MTYPE_WC: - pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_WC); - break; + return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC); + case AMDGPU_VM_MTYPE_RW: + return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW); case AMDGPU_VM_MTYPE_CC: - pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_CC); - break; + return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC); case AMDGPU_VM_MTYPE_UC: - pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_UC); - break; + return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC); default: - pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_NC); - break; + return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); } - - if (flags & AMDGPU_VM_PAGE_PRT) - pte_flag |= AMDGPU_PTE_PRT; - - return pte_flag; } static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, @@ -589,13 +654,34 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, } } +static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping, + uint64_t *flags) +{ + *flags &= ~AMDGPU_PTE_EXECUTABLE; + *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + + *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; + *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK; + + if (mapping->flags & AMDGPU_PTE_PRT) { + *flags |= AMDGPU_PTE_PRT; + *flags &= ~AMDGPU_PTE_VALID; + } + + if (adev->asic_type == CHIP_ARCTURUS && + !(*flags & AMDGPU_PTE_SYSTEM) && + mapping->bo_va->is_xgmi) + *flags |= AMDGPU_PTE_SNOOPED; +} + static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping, - .set_pte_pde = gmc_v9_0_set_pte_pde, - .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags, - .get_vm_pde = gmc_v9_0_get_vm_pde + .map_mtype = gmc_v9_0_map_mtype, + .get_vm_pde = gmc_v9_0_get_vm_pde, + .get_vm_pte = gmc_v9_0_get_vm_pte }; static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) @@ -603,12 +689,44 @@ static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs; } +static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_VEGA10: + adev->umc.funcs = &umc_v6_0_funcs; + break; + case CHIP_VEGA20: + adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; + adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM; + adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; + adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET; + adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; + adev->umc.funcs = &umc_v6_1_funcs; + break; + default: + break; + } +} + +static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_VEGA20: + adev->mmhub.funcs = &mmhub_v1_0_funcs; + break; + default: + break; + } +} + static int gmc_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; gmc_v9_0_set_gmc_funcs(adev); gmc_v9_0_set_irq_funcs(adev); + gmc_v9_0_set_umc_funcs(adev); + gmc_v9_0_set_mmhub_funcs(adev); adev->gmc.shared_aperture_start = 0x2000000000000000ULL; adev->gmc.shared_aperture_end = @@ -620,85 +738,6 @@ static int gmc_v9_0_early_init(void *handle) return 0; } -static int gmc_v9_0_ecc_available(struct amdgpu_device *adev) -{ - uint32_t reg_val; - uint32_t reg_addr; - uint32_t field_val; - size_t i; - uint32_t fv2; - size_t lost_sheep; - - DRM_DEBUG("ecc: gmc_v9_0_ecc_available()\n"); - - lost_sheep = 0; - for (i = 0; i < ARRAY_SIZE(ecc_umclocalcap_addrs); ++i) { - reg_addr = ecc_umclocalcap_addrs[i]; - DRM_DEBUG("ecc: " - "UMCCH_UmcLocalCap[%zu]: reg_addr: 0x%08x\n", - i, reg_addr); - reg_val = RREG32(reg_addr); - field_val = REG_GET_FIELD(reg_val, UMCCH0_0_UmcLocalCap, - EccDis); - DRM_DEBUG("ecc: " - "reg_val: 0x%08x, " - "EccDis: 0x%08x, ", - reg_val, field_val); - if (field_val) { - DRM_ERROR("ecc: UmcLocalCap:EccDis is set.\n"); - ++lost_sheep; - } - } - - for (i = 0; i < ARRAY_SIZE(ecc_umcch_umc_config_addrs); ++i) { - reg_addr = ecc_umcch_umc_config_addrs[i]; - DRM_DEBUG("ecc: " - "UMCCH0_0_UMC_CONFIG[%zu]: reg_addr: 0x%08x", - i, reg_addr); - reg_val = RREG32(reg_addr); - field_val = REG_GET_FIELD(reg_val, UMCCH0_0_UMC_CONFIG, - DramReady); - DRM_DEBUG("ecc: " - "reg_val: 0x%08x, " - "DramReady: 0x%08x\n", - reg_val, field_val); - - if (!field_val) { - DRM_ERROR("ecc: UMC_CONFIG:DramReady is not set.\n"); - ++lost_sheep; - } - } - - for (i = 0; i < ARRAY_SIZE(ecc_umcch_eccctrl_addrs); ++i) { - reg_addr = ecc_umcch_eccctrl_addrs[i]; - DRM_DEBUG("ecc: " - "UMCCH_EccCtrl[%zu]: reg_addr: 0x%08x, ", - i, reg_addr); - reg_val = RREG32(reg_addr); - field_val = REG_GET_FIELD(reg_val, UMCCH0_0_EccCtrl, - WrEccEn); - fv2 = REG_GET_FIELD(reg_val, UMCCH0_0_EccCtrl, - RdEccEn); - DRM_DEBUG("ecc: " - "reg_val: 0x%08x, " - "WrEccEn: 0x%08x, " - "RdEccEn: 0x%08x\n", - reg_val, field_val, fv2); - - if (!field_val) { - DRM_DEBUG("ecc: WrEccEn is not set\n"); - ++lost_sheep; - } - if (!fv2) { - DRM_DEBUG("ecc: RdEccEn is not set\n"); - ++lost_sheep; - } - } - - DRM_DEBUG("ecc: lost_sheep: %zu\n", lost_sheep); - return lost_sheep == 0; -} - static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev) { @@ -713,8 +752,10 @@ static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev) */ switch (adev->asic_type) { case CHIP_VEGA10: - return true; case CHIP_RAVEN: + case CHIP_ARCTURUS: + case CHIP_RENOIR: + return true; case CHIP_VEGA12: case CHIP_VEGA20: default: @@ -726,7 +767,8 @@ static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev) { struct amdgpu_ring *ring; unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = - {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP}; + {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP, + GFXHUB_FREE_VM_INV_ENGS_BITMAP}; unsigned i; unsigned vmhub, inv_eng; @@ -762,20 +804,36 @@ static int gmc_v9_0_late_init(void *handle) r = gmc_v9_0_allocate_vm_inv_eng(adev); if (r) return r; + /* Check if ecc is available */ + if (!amdgpu_sriov_vf(adev)) { + switch (adev->asic_type) { + case CHIP_VEGA10: + case CHIP_VEGA20: + r = amdgpu_atomfirmware_mem_ecc_supported(adev); + if (!r) { + DRM_INFO("ECC is not present.\n"); + if (adev->df_funcs->enable_ecc_force_par_wr_rmw) + adev->df_funcs->enable_ecc_force_par_wr_rmw(adev, false); + } else { + DRM_INFO("ECC is active.\n"); + } - if (adev->asic_type == CHIP_VEGA10 && !amdgpu_sriov_vf(adev)) { - r = gmc_v9_0_ecc_available(adev); - if (r == 1) { - DRM_INFO("ECC is active.\n"); - } else if (r == 0) { - DRM_INFO("ECC is not present.\n"); - adev->df_funcs->enable_ecc_force_par_wr_rmw(adev, false); - } else { - DRM_ERROR("gmc_v9_0_ecc_available() failed. r: %d\n", r); - return r; + r = amdgpu_atomfirmware_sram_ecc_supported(adev); + if (!r) { + DRM_INFO("SRAM ECC is not present.\n"); + } else { + DRM_INFO("SRAM ECC is active.\n"); + } + break; + default: + break; } } + r = amdgpu_gmc_ras_late_init(adev); + if (r) + return r; + return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); } @@ -783,14 +841,17 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) { u64 base = 0; - if (!amdgpu_sriov_vf(adev)) + + if (adev->asic_type == CHIP_ARCTURUS) + base = mmhub_v9_4_get_fb_location(adev); + else if (!amdgpu_sriov_vf(adev)) base = mmhub_v1_0_get_fb_location(adev); + /* add the xgmi offset of the physical node */ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; - amdgpu_gmc_vram_location(adev, &adev->gmc, base); + amdgpu_gmc_vram_location(adev, mc, base); amdgpu_gmc_gart_location(adev, mc); - if (!amdgpu_sriov_vf(adev)) - amdgpu_gmc_agp_location(adev, mc); + amdgpu_gmc_agp_location(adev, mc); /* base offset of vram pages */ adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); @@ -810,25 +871,11 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, */ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) { - int chansize, numchan; int r; - if (amdgpu_emu_mode != 1) - adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev); - if (!adev->gmc.vram_width) { - /* hbm memory channel size */ - if (adev->flags & AMD_IS_APU) - chansize = 64; - else - chansize = 128; - - numchan = adev->df_funcs->get_hbm_channel_number(adev); - adev->gmc.vram_width = numchan * chansize; - } - /* size in MB on si */ adev->gmc.mc_vram_size = - adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL; + adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; adev->gmc.real_vram_size = adev->gmc.mc_vram_size; if (!(adev->flags & AMD_IS_APU)) { @@ -856,10 +903,12 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) case CHIP_VEGA10: /* all engines support GPUVM */ case CHIP_VEGA12: /* all engines support GPUVM */ case CHIP_VEGA20: + case CHIP_ARCTURUS: default: adev->gmc.gart_size = 512ULL << 20; break; case CHIP_RAVEN: /* DCE SG support */ + case CHIP_RENOIR: adev->gmc.gart_size = 1024ULL << 20; break; } @@ -885,14 +934,14 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) if (r) return r; adev->gart.table_size = adev->gart.num_gpu_pages * 8; - adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE(MTYPE_UC) | + adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) | AMDGPU_PTE_EXECUTABLE; return amdgpu_gart_table_vram_alloc(adev); } static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) { - u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); + u32 d1vga_control; unsigned size; /* @@ -902,6 +951,7 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) if (gmc_v9_0_keep_stolen_memory(adev)) return 9 * 1024 * 1024; + d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ } else { @@ -909,6 +959,7 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_RAVEN: + case CHIP_RENOIR: viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION); size = (REG_GET_FIELD(viewport, HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) * @@ -936,18 +987,47 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) static int gmc_v9_0_sw_init(void *handle) { - int r; - int dma_bits; + int r, vram_width = 0, vram_type = 0, vram_vendor = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; gfxhub_v1_0_init(adev); - mmhub_v1_0_init(adev); + if (adev->asic_type == CHIP_ARCTURUS) + mmhub_v9_4_init(adev); + else + mmhub_v1_0_init(adev); spin_lock_init(&adev->gmc.invalidate_lock); - adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev); + r = amdgpu_atomfirmware_get_vram_info(adev, + &vram_width, &vram_type, &vram_vendor); + if (amdgpu_sriov_vf(adev)) + /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN, + * and DF related registers is not readable, seems hardcord is the + * only way to set the correct vram_width + */ + adev->gmc.vram_width = 2048; + else if (amdgpu_emu_mode != 1) + adev->gmc.vram_width = vram_width; + + if (!adev->gmc.vram_width) { + int chansize, numchan; + + /* hbm memory channel size */ + if (adev->flags & AMD_IS_APU) + chansize = 64; + else + chansize = 128; + + numchan = adev->df_funcs->get_hbm_channel_number(adev); + adev->gmc.vram_width = numchan * chansize; + } + + adev->gmc.vram_type = vram_type; + adev->gmc.vram_vendor = vram_vendor; switch (adev->asic_type) { case CHIP_RAVEN: + adev->num_vmhubs = 2; + if (adev->rev_id == 0x0 || adev->rev_id == 0x1) { amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); } else { @@ -960,6 +1040,10 @@ static int gmc_v9_0_sw_init(void *handle) case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: + case CHIP_RENOIR: + adev->num_vmhubs = 2; + + /* * To fulfill 4-level page support, * vm size is 256TB (48bit), maximum size of Vega10, @@ -971,6 +1055,12 @@ static int gmc_v9_0_sw_init(void *handle) else amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); break; + case CHIP_ARCTURUS: + adev->num_vmhubs = 3; + + /* Keep the vm size same with Vega20 */ + amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + break; default: break; } @@ -981,37 +1071,37 @@ static int gmc_v9_0_sw_init(void *handle) if (r) return r; + if (adev->asic_type == CHIP_ARCTURUS) { + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT, + &adev->gmc.vm_fault); + if (r) + return r; + } + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT, &adev->gmc.vm_fault); if (r) return r; + /* interrupt sent to DF. */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0, + &adev->gmc.ecc_irq); + if (r) + return r; + /* Set the internal MC address mask * This is the max address of the GPU's * internal address space. */ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ - /* set DMA mask + need_dma32 flags. - * PCIE - can handle 44-bits. - * IGP - can handle 44-bits - * PCI - dma32 for legacy pci gart, 44 bits on vega10 - */ - adev->need_dma32 = false; - dma_bits = adev->need_dma32 ? 32 : 44; - r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); + r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); if (r) { - adev->need_dma32 = true; - dma_bits = 32; printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); + return r; } - r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); - if (r) { - pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); - printk(KERN_WARNING "amdgpu: No coherent DMA available.\n"); - } - adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits); + adev->need_swiotlb = drm_need_swiotlb(44); if (adev->gmc.xgmi.supported) { r = gfxhub_v1_1_get_xgmi_info(adev); @@ -1040,8 +1130,9 @@ static int gmc_v9_0_sw_init(void *handle) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[AMDGPU_MMHUB_1].num_ids = AMDGPU_NUM_OF_VMIDS; amdgpu_vm_manager_init(adev); @@ -1051,12 +1142,14 @@ static int gmc_v9_0_sw_init(void *handle) static int gmc_v9_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + void *stolen_vga_buf; + amdgpu_gmc_ras_fini(adev); amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); if (gmc_v9_0_keep_stolen_memory(adev)) - amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); + amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf); amdgpu_gart_table_vram_free(adev); amdgpu_bo_fini(adev); @@ -1070,6 +1163,9 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: + if (amdgpu_sriov_vf(adev)) + break; + /* fall through */ case CHIP_VEGA20: soc15_program_register_sequence(adev, golden_settings_mmhub_1_0_0, @@ -1081,6 +1177,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) case CHIP_VEGA12: break; case CHIP_RAVEN: + /* TODO for renoir */ soc15_program_register_sequence(adev, golden_settings_athub_1_0_0, ARRAY_SIZE(golden_settings_athub_1_0_0)); @@ -1098,12 +1195,6 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) { int r; - bool value; - u32 tmp; - - amdgpu_device_program_register_sequence(adev, - golden_settings_vega10_hdp, - ARRAY_SIZE(golden_settings_vega10_hdp)); if (adev->gart.bo == NULL) { dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); @@ -1113,39 +1204,17 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) if (r) return r; - switch (adev->asic_type) { - case CHIP_RAVEN: - mmhub_v1_0_update_power_gating(adev, true); - break; - default: - break; - } - r = gfxhub_v1_0_gart_enable(adev); if (r) return r; - r = mmhub_v1_0_gart_enable(adev); + if (adev->asic_type == CHIP_ARCTURUS) + r = mmhub_v9_4_gart_enable(adev); + else + r = mmhub_v1_0_gart_enable(adev); if (r) return r; - WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1); - - tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL); - WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); - - /* After HDP is initialized, flush HDP.*/ - adev->nbio_funcs->hdp_flush(adev, NULL); - - if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) - value = false; - else - value = true; - - gfxhub_v1_0_set_fault_enable_default(adev, value); - mmhub_v1_0_set_fault_enable_default(adev, value); - gmc_v9_0_flush_gpu_tlb(adev, 0, 0); - DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); @@ -1155,20 +1224,68 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) static int gmc_v9_0_hw_init(void *handle) { - int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool value; + int r, i; + u32 tmp; /* The sequence of these two function calls matters.*/ gmc_v9_0_init_golden_registers(adev); if (adev->mode_info.num_crtc) { - /* Lockout access through VGA aperture*/ - WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); + if (adev->asic_type != CHIP_ARCTURUS) { + /* Lockout access through VGA aperture*/ + WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); - /* disable VGA render */ - WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); + /* disable VGA render */ + WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); + } } + amdgpu_device_program_register_sequence(adev, + golden_settings_vega10_hdp, + ARRAY_SIZE(golden_settings_vega10_hdp)); + + switch (adev->asic_type) { + case CHIP_RAVEN: + /* TODO for renoir */ + mmhub_v1_0_update_power_gating(adev, true); + break; + case CHIP_ARCTURUS: + WREG32_FIELD15(HDP, 0, HDP_MMHUB_CNTL, HDP_MMHUB_GCC, 1); + break; + default: + break; + } + + WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1); + + tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL); + WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); + + WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8)); + WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40)); + + /* After HDP is initialized, flush HDP.*/ + adev->nbio.funcs->hdp_flush(adev, NULL); + + if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) + value = false; + else + value = true; + + gfxhub_v1_0_set_fault_enable_default(adev, value); + if (adev->asic_type == CHIP_ARCTURUS) + mmhub_v9_4_set_fault_enable_default(adev, value); + else + mmhub_v1_0_set_fault_enable_default(adev, value); + + for (i = 0; i < adev->num_vmhubs; ++i) + gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0); + + if (adev->umc.funcs && adev->umc.funcs->init_registers) + adev->umc.funcs->init_registers(adev); + r = gmc_v9_0_gart_enable(adev); return r; @@ -1184,7 +1301,10 @@ static int gmc_v9_0_hw_init(void *handle) static void gmc_v9_0_gart_disable(struct amdgpu_device *adev) { gfxhub_v1_0_gart_disable(adev); - mmhub_v1_0_gart_disable(adev); + if (adev->asic_type == CHIP_ARCTURUS) + mmhub_v9_4_gart_disable(adev); + else + mmhub_v1_0_gart_disable(adev); amdgpu_gart_table_vram_unpin(adev); } @@ -1198,6 +1318,7 @@ static int gmc_v9_0_hw_fini(void *handle) return 0; } + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); gmc_v9_0_gart_disable(adev); @@ -1248,14 +1369,26 @@ static int gmc_v9_0_set_clockgating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - return mmhub_v1_0_set_clockgating(adev, state); + if (adev->asic_type == CHIP_ARCTURUS) + mmhub_v9_4_set_clockgating(adev, state); + else + mmhub_v1_0_set_clockgating(adev, state); + + athub_v1_0_set_clockgating(adev, state); + + return 0; } static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - mmhub_v1_0_get_clockgating(adev, flags); + if (adev->asic_type == CHIP_ARCTURUS) + mmhub_v9_4_get_clockgating(adev, flags); + else + mmhub_v1_0_get_clockgating(adev, flags); + + athub_v1_0_get_clockgating(adev, flags); } static int gmc_v9_0_set_powergating_state(void *handle, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h index 5c8deac65580..971c0840358f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h @@ -37,4 +37,11 @@ extern const struct amd_ip_funcs gmc_v9_0_ip_funcs; extern const struct amdgpu_ip_block_version gmc_v9_0_ip_block; +/* amdgpu_amdkfd*.c */ +void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t value); +void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t value); +void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid, + uint32_t vmid, uint64_t value); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index b1626e1d2f5d..a13dd9a51149 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -20,7 +20,9 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include <drm/drmP.h> + +#include <linux/pci.h> + #include "amdgpu.h" #include "amdgpu_ih.h" #include "vid.h" diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index 0c9a2c03504e..4b3faaccecb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -21,7 +21,6 @@ * */ -#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_pm.h" #include "cikd.h" @@ -2824,7 +2823,7 @@ static int kv_dpm_init(struct amdgpu_device *adev) pi->caps_tcp_ramping = true; } - if (adev->powerplay.pp_feature & PP_SCLK_DEEP_SLEEP_MASK) + if (adev->pm.pp_feature & PP_SCLK_DEEP_SLEEP_MASK) pi->caps_sclk_ds = true; else pi->caps_sclk_ds = false; diff --git a/drivers/gpu/drm/amd/amdgpu/kv_smc.c b/drivers/gpu/drm/amd/amdgpu/kv_smc.c index b82e33c01571..2d9ab6b8be66 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_smc.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_smc.c @@ -22,7 +22,6 @@ * Authors: Alex Deucher */ -#include <drm/drmP.h> #include "amdgpu.h" #include "cikd.h" #include "kv_dpm.h" diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c new file mode 100644 index 000000000000..29fab7984855 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c @@ -0,0 +1,366 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include <linux/module.h> +#include "amdgpu.h" +#include "soc15_common.h" +#include "nv.h" +#include "gc/gc_10_1_0_offset.h" +#include "gc/gc_10_1_0_sh_mask.h" + +MODULE_FIRMWARE("amdgpu/navi10_mes.bin"); + +static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes, + struct mes_add_queue_input *input) +{ + return 0; +} + +static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes, + struct mes_remove_queue_input *input) +{ + return 0; +} + +static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes, + struct mes_suspend_gang_input *input) +{ + return 0; +} + +static int mes_v10_1_resume_gang(struct amdgpu_mes *mes, + struct mes_resume_gang_input *input) +{ + return 0; +} + +static const struct amdgpu_mes_funcs mes_v10_1_funcs = { + .add_hw_queue = mes_v10_1_add_hw_queue, + .remove_hw_queue = mes_v10_1_remove_hw_queue, + .suspend_gang = mes_v10_1_suspend_gang, + .resume_gang = mes_v10_1_resume_gang, +}; + +static int mes_v10_1_init_microcode(struct amdgpu_device *adev) +{ + const char *chip_name; + char fw_name[30]; + int err; + const struct mes_firmware_header_v1_0 *mes_hdr; + + switch (adev->asic_type) { + case CHIP_NAVI10: + chip_name = "navi10"; + break; + default: + BUG(); + } + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin", chip_name); + err = request_firmware(&adev->mes.fw, fw_name, adev->dev); + if (err) + return err; + + err = amdgpu_ucode_validate(adev->mes.fw); + if (err) { + release_firmware(adev->mes.fw); + adev->mes.fw = NULL; + return err; + } + + mes_hdr = (const struct mes_firmware_header_v1_0 *)adev->mes.fw->data; + adev->mes.ucode_fw_version = le32_to_cpu(mes_hdr->mes_ucode_version); + adev->mes.ucode_fw_version = + le32_to_cpu(mes_hdr->mes_ucode_data_version); + adev->mes.uc_start_addr = + le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) | + ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32); + adev->mes.data_start_addr = + le32_to_cpu(mes_hdr->mes_data_start_addr_lo) | + ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32); + + return 0; +} + +static void mes_v10_1_free_microcode(struct amdgpu_device *adev) +{ + release_firmware(adev->mes.fw); + adev->mes.fw = NULL; +} + +static int mes_v10_1_allocate_ucode_buffer(struct amdgpu_device *adev) +{ + int r; + const struct mes_firmware_header_v1_0 *mes_hdr; + const __le32 *fw_data; + unsigned fw_size; + + mes_hdr = (const struct mes_firmware_header_v1_0 *) + adev->mes.fw->data; + + fw_data = (const __le32 *)(adev->mes.fw->data + + le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); + fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); + + r = amdgpu_bo_create_reserved(adev, fw_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &adev->mes.ucode_fw_obj, + &adev->mes.ucode_fw_gpu_addr, + (void **)&adev->mes.ucode_fw_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r); + return r; + } + + memcpy(adev->mes.ucode_fw_ptr, fw_data, fw_size); + + amdgpu_bo_kunmap(adev->mes.ucode_fw_obj); + amdgpu_bo_unreserve(adev->mes.ucode_fw_obj); + + return 0; +} + +static int mes_v10_1_allocate_ucode_data_buffer(struct amdgpu_device *adev) +{ + int r; + const struct mes_firmware_header_v1_0 *mes_hdr; + const __le32 *fw_data; + unsigned fw_size; + + mes_hdr = (const struct mes_firmware_header_v1_0 *) + adev->mes.fw->data; + + fw_data = (const __le32 *)(adev->mes.fw->data + + le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); + fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); + + r = amdgpu_bo_create_reserved(adev, fw_size, + 64 * 1024, AMDGPU_GEM_DOMAIN_GTT, + &adev->mes.data_fw_obj, + &adev->mes.data_fw_gpu_addr, + (void **)&adev->mes.data_fw_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r); + return r; + } + + memcpy(adev->mes.data_fw_ptr, fw_data, fw_size); + + amdgpu_bo_kunmap(adev->mes.data_fw_obj); + amdgpu_bo_unreserve(adev->mes.data_fw_obj); + + return 0; +} + +static void mes_v10_1_free_ucode_buffers(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->mes.data_fw_obj, + &adev->mes.data_fw_gpu_addr, + (void **)&adev->mes.data_fw_ptr); + + amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj, + &adev->mes.ucode_fw_gpu_addr, + (void **)&adev->mes.ucode_fw_ptr); +} + +static void mes_v10_1_enable(struct amdgpu_device *adev, bool enable) +{ + uint32_t data = 0; + + if (enable) { + data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); + WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data); + + /* set ucode start address */ + WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START, + (uint32_t)(adev->mes.uc_start_addr) >> 2); + + /* clear BYPASS_UNCACHED to avoid hangs after interrupt. */ + data = RREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL); + data = REG_SET_FIELD(data, CP_MES_DC_OP_CNTL, + BYPASS_UNCACHED, 0); + WREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL, data); + + /* unhalt MES and activate pipe0 */ + data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); + WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data); + } else { + data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, + MES_INVALIDATE_ICACHE, 1); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1); + WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data); + } +} + +/* This function is for backdoor MES firmware */ +static int mes_v10_1_load_microcode(struct amdgpu_device *adev) +{ + int r; + uint32_t data; + + if (!adev->mes.fw) + return -EINVAL; + + r = mes_v10_1_allocate_ucode_buffer(adev); + if (r) + return r; + + r = mes_v10_1_allocate_ucode_data_buffer(adev); + if (r) { + mes_v10_1_free_ucode_buffers(adev); + return r; + } + + mes_v10_1_enable(adev, false); + + WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_CNTL, 0); + + mutex_lock(&adev->srbm_mutex); + /* me=3, pipe=0, queue=0 */ + nv_grbm_select(adev, 3, 0, 0, 0); + + /* set ucode start address */ + WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START, + (uint32_t)(adev->mes.uc_start_addr) >> 2); + + /* set ucode fimrware address */ + WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_LO, + lower_32_bits(adev->mes.ucode_fw_gpu_addr)); + WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_HI, + upper_32_bits(adev->mes.ucode_fw_gpu_addr)); + + /* set ucode instruction cache boundary to 2M-1 */ + WREG32_SOC15(GC, 0, mmCP_MES_MIBOUND_LO, 0x1FFFFF); + + /* set ucode data firmware address */ + WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_LO, + lower_32_bits(adev->mes.data_fw_gpu_addr)); + WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_HI, + upper_32_bits(adev->mes.data_fw_gpu_addr)); + + /* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */ + WREG32_SOC15(GC, 0, mmCP_MES_MDBOUND_LO, 0x3FFFF); + + /* invalidate ICACHE */ + data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL); + data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0); + data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data); + + /* prime the ICACHE. */ + data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL); + data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1); + WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data); + + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + return 0; +} + +static int mes_v10_1_sw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = mes_v10_1_init_microcode(adev); + if (r) + return r; + + return 0; +} + +static int mes_v10_1_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + mes_v10_1_free_microcode(adev); + + return 0; +} + +static int mes_v10_1_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + r = mes_v10_1_load_microcode(adev); + if (r) { + DRM_ERROR("failed to MES fw, r=%d\n", r); + return r; + } + } else { + DRM_ERROR("only support direct fw loading on MES\n"); + return -EINVAL; + } + + mes_v10_1_enable(adev, true); + + return 0; +} + +static int mes_v10_1_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + mes_v10_1_enable(adev, false); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) + mes_v10_1_free_ucode_buffers(adev); + + return 0; +} + +static int mes_v10_1_suspend(void *handle) +{ + return 0; +} + +static int mes_v10_1_resume(void *handle) +{ + return 0; +} + +static const struct amd_ip_funcs mes_v10_1_ip_funcs = { + .name = "mes_v10_1", + .sw_init = mes_v10_1_sw_init, + .sw_fini = mes_v10_1_sw_fini, + .hw_init = mes_v10_1_hw_init, + .hw_fini = mes_v10_1_hw_fini, + .suspend = mes_v10_1_suspend, + .resume = mes_v10_1_resume, +}; + +const struct amdgpu_ip_block_version mes_v10_1_ip_block = { + .type = AMD_IP_BLOCK_TYPE_MES, + .major = 10, + .minor = 1, + .rev = 0, + .funcs = &mes_v10_1_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.h b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.h new file mode 100644 index 000000000000..9afd6ddb01e9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.h @@ -0,0 +1,29 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __MES_V10_1_H__ +#define __MES_V10_1_H__ + +extern const struct amdgpu_ip_block_version mes_v10_1_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index d0d966d6080a..28105e4af507 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -21,13 +21,13 @@ * */ #include "amdgpu.h" +#include "amdgpu_ras.h" #include "mmhub_v1_0.h" #include "mmhub/mmhub_1_0_offset.h" #include "mmhub/mmhub_1_0_sh_mask.h" #include "mmhub/mmhub_1_0_default.h" -#include "athub/athub_1_0_offset.h" -#include "athub/athub_1_0_sh_mask.h" +#include "mmhub/mmhub_9_4_0_offset.h" #include "vega10_enum.h" #include "soc15_common.h" @@ -35,6 +35,9 @@ #define mmDAGB0_CNTL_MISC2_RV 0x008f #define mmDAGB0_CNTL_MISC2_RV_BASE_IDX 0 +#define EA_EDC_CNT_MASK 0x3 +#define EA_EDC_CNT_SHIFT 0x2 + u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) { u64 base = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE); @@ -111,6 +114,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); + if (amdgpu_sriov_vf(adev)) + return; + /* Set default page address. */ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + adev->vm_manager.vram_base_offset; @@ -156,6 +162,9 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) { uint32_t tmp; + if (amdgpu_sriov_vf(adev)) + return; + /* Setup L2 cache */ tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); @@ -163,7 +172,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) /* XXX for emulation, Refer to closed source code.*/ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp); @@ -182,6 +191,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6); } + WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp); tmp = mmVM_L2_CNTL4_DEFAULT; tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); @@ -196,11 +206,16 @@ static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev) tmp = RREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL, tmp); } static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) { + if (amdgpu_sriov_vf(adev)) + return; + WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, 0XFFFFFFFF); WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, @@ -255,7 +270,8 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) block_size); /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, + !amdgpu_noretry); WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i, tmp); WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); @@ -337,11 +353,13 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) 0); WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); - /* Setup L2 cache */ - tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); - WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp); - WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0); + if (!amdgpu_sriov_vf(adev)) { + /* Setup L2 cache */ + tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp); + WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0); + } } /** @@ -353,6 +371,10 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) { u32 tmp; + + if (amdgpu_sriov_vf(adev)) + return; + tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); @@ -390,7 +412,7 @@ void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) void mmhub_v1_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, @@ -398,6 +420,8 @@ void mmhub_v1_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = @@ -474,22 +498,6 @@ static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *ad WREG32_SOC15(MMHUB, 0, mmDAGB1_CNTL_MISC2, data2); } -static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev, - bool enable) -{ - uint32_t def, data; - - def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); - - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) - data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; - else - data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; - - if (def != data) - WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); -} - static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, bool enable) { @@ -506,23 +514,6 @@ static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *ade WREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG, data); } -static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev, - bool enable) -{ - uint32_t def, data; - - def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); - - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && - (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) - data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; - else - data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; - - if(def != data) - WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); -} - int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state) { @@ -534,14 +525,11 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_RENOIR: mmhub_v1_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); - athub_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); mmhub_v1_0_update_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); - athub_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); break; default: break; @@ -552,18 +540,86 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) { - int data; + int data, data1; if (amdgpu_sriov_vf(adev)) *flags = 0; + data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG); + + data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2); + /* AMD_CG_SUPPORT_MC_MGCG */ - data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); - if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) + if ((data & ATC_L2_MISC_CG__ENABLE_MASK) && + !(data1 & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK))) *flags |= AMD_CG_SUPPORT_MC_MGCG; /* AMD_CG_SUPPORT_MC_LS */ - data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG); if (data & ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) *flags |= AMD_CG_SUPPORT_MC_LS; } + +static void mmhub_v1_0_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + int i; + uint32_t ea0_edc_cnt, ea0_edc_cnt2; + uint32_t ea1_edc_cnt, ea1_edc_cnt2; + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + /* EDC CNT will be cleared automatically after read */ + ea0_edc_cnt = RREG32_SOC15(MMHUB, 0, mmMMEA0_EDC_CNT_VG20); + ea0_edc_cnt2 = RREG32_SOC15(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20); + ea1_edc_cnt = RREG32_SOC15(MMHUB, 0, mmMMEA1_EDC_CNT_VG20); + ea1_edc_cnt2 = RREG32_SOC15(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20); + + /* error count of each error type is recorded by 2 bits, + * ce and ue count in EDC_CNT + */ + for (i = 0; i < 5; i++) { + err_data->ce_count += (ea0_edc_cnt & EA_EDC_CNT_MASK); + err_data->ce_count += (ea1_edc_cnt & EA_EDC_CNT_MASK); + ea0_edc_cnt >>= EA_EDC_CNT_SHIFT; + ea1_edc_cnt >>= EA_EDC_CNT_SHIFT; + err_data->ue_count += (ea0_edc_cnt & EA_EDC_CNT_MASK); + err_data->ue_count += (ea1_edc_cnt & EA_EDC_CNT_MASK); + ea0_edc_cnt >>= EA_EDC_CNT_SHIFT; + ea1_edc_cnt >>= EA_EDC_CNT_SHIFT; + } + /* successive ue count in EDC_CNT */ + for (i = 0; i < 5; i++) { + err_data->ue_count += (ea0_edc_cnt & EA_EDC_CNT_MASK); + err_data->ue_count += (ea1_edc_cnt & EA_EDC_CNT_MASK); + ea0_edc_cnt >>= EA_EDC_CNT_SHIFT; + ea1_edc_cnt >>= EA_EDC_CNT_SHIFT; + } + + /* ce and ue count in EDC_CNT2 */ + for (i = 0; i < 3; i++) { + err_data->ce_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK); + err_data->ce_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK); + ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT; + ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT; + err_data->ue_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK); + err_data->ue_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK); + ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT; + ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT; + } + /* successive ue count in EDC_CNT2 */ + for (i = 0; i < 6; i++) { + err_data->ue_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK); + err_data->ue_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK); + ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT; + ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT; + } +} + +const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs = { + .ras_late_init = amdgpu_mmhub_ras_late_init, + .query_ras_error_count = mmhub_v1_0_query_ras_error_count, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h index 0de0fdf98c00..c43319e8f945 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h @@ -23,6 +23,8 @@ #ifndef __MMHUB_V1_0_H__ #define __MMHUB_V1_0_H__ +extern const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs; + u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev); int mmhub_v1_0_gart_enable(struct amdgpu_device *adev); void mmhub_v1_0_gart_disable(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c new file mode 100644 index 000000000000..a7cb185d639a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -0,0 +1,465 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "mmhub_v2_0.h" + +#include "mmhub/mmhub_2_0_0_offset.h" +#include "mmhub/mmhub_2_0_0_sh_mask.h" +#include "mmhub/mmhub_2_0_0_default.h" +#include "navi10_enum.h" + +#include "soc15_common.h" + +void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base) +{ + /* two registers distance between mmMMVM_CONTEXT0_* to mmMMVM_CONTEXT1_* */ + int offset = mmMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 + - mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + offset * vmid, lower_32_bits(page_table_base)); + + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + offset * vmid, upper_32_bits(page_table_base)); +} + +static void mmhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + mmhub_v2_0_setup_vm_pt_regs(adev, 0, pt_base); + + WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->gmc.gart_start >> 12)); + WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->gmc.gart_start >> 44)); + + WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->gmc.gart_end >> 44)); +} + +static void mmhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t value; + uint32_t tmp; + + /* Disable AGP. */ + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BASE, 0); + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_TOP, 0); + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BOT, 0x00FFFFFF); + + /* Program the system aperture low logical page number. */ + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_LOW_ADDR, + adev->gmc.vram_start >> 18); + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + adev->gmc.vram_end >> 18); + + /* Set default page address. */ + value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + (u32)(value >> 12)); + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + (u32)((u64)adev->dummy_page_addr >> 44)); + + tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL2); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL2, tmp); +} + +static void mmhub_v2_0_init_tlb_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* Setup TLB control */ + tmp = RREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL); + + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC); /* UC, uncached */ + + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL, tmp); +} + +static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* Setup L2 cache */ + tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, + ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1); + /* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, + 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL, tmp); + + tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL2); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL2, tmp); + + tmp = mmMMVM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL3, tmp); + + tmp = mmMMVM_L2_CNTL4_DEFAULT; + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL4, tmp); +} + +static void mmhub_v2_0_enable_system_domain(struct amdgpu_device *adev) +{ + uint32_t tmp; + + tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, tmp); +} + +static void mmhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev) +{ + WREG32_SOC15(MMHUB, 0, + mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + 0xFFFFFFFF); + WREG32_SOC15(MMHUB, 0, + mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + 0x0000000F); + + WREG32_SOC15(MMHUB, 0, + mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0); + WREG32_SOC15(MMHUB, 0, + mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0); + + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, + 0); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, + 0); +} + +static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) +{ + int i; + uint32_t tmp; + + for (i = 0; i <= 14; i++) { + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, + adev->vm_manager.num_level); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, + 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, + adev->vm_manager.block_size - 9); + /* Send no-retry XNACK on fault to suppress VM fault storm. */ + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, + !amdgpu_noretry); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i, tmp); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2, + upper_32_bits(adev->vm_manager.max_pfn - 1)); + } +} + +static void mmhub_v2_0_program_invalidation(struct amdgpu_device *adev) +{ + unsigned i; + + for (i = 0; i < 18; ++i) { + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + 2 * i, 0xffffffff); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + 2 * i, 0x1f); + } +} + +int mmhub_v2_0_gart_enable(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev)) { + /* + * MMMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are + * VF copy registers so vbios post doesn't program them, for + * SRIOV driver need to program them + */ + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_FB_LOCATION_BASE, + adev->gmc.vram_start >> 24); + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_FB_LOCATION_TOP, + adev->gmc.vram_end >> 24); + } + + /* GART Enable. */ + mmhub_v2_0_init_gart_aperture_regs(adev); + mmhub_v2_0_init_system_aperture_regs(adev); + mmhub_v2_0_init_tlb_regs(adev); + mmhub_v2_0_init_cache_regs(adev); + + mmhub_v2_0_enable_system_domain(adev); + mmhub_v2_0_disable_identity_aperture(adev); + mmhub_v2_0_setup_vmid_config(adev); + mmhub_v2_0_program_invalidation(adev); + + return 0; +} + +void mmhub_v2_0_gart_disable(struct amdgpu_device *adev) +{ + u32 tmp; + u32 i; + + /* Disable all tables */ + for (i = 0; i < 16; i++) + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, i, 0); + + /* Setup TLB control */ + tmp = RREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL, tmp); + + /* Setup L2 cache */ + tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL, tmp); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL3, 0); +} + +/** + * mmhub_v2_0_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +void mmhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) +{ + u32 tmp; + tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + if (!value) { + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_NO_RETRY_FAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_RETRY_FAULT, 1); + } + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL, tmp); +} + +void mmhub_v2_0_init(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + + hub->ctx0_ptb_addr_lo32 = + SOC15_REG_OFFSET(MMHUB, 0, + mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32); + hub->ctx0_ptb_addr_hi32 = + SOC15_REG_OFFSET(MMHUB, 0, + mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_SEM); + hub->vm_inv_eng0_req = + SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_REQ); + hub->vm_inv_eng0_ack = + SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ACK); + hub->vm_context0_cntl = + SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL); + hub->vm_l2_pro_fault_status = + SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_STATUS); + hub->vm_l2_pro_fault_cntl = + SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL); + +} + +static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data, def1, data1; + + def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG); + + def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { + data |= MM_ATC_L2_MISC_CG__ENABLE_MASK; + + data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + + } else { + data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK; + + data1 |= (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + } + + if (def != data) + WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data); + + if (def1 != data1) + WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2, data1); +} + +static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) + data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + else + data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + + if (def != data) + WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data); +} + +int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state) +{ + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (adev->asic_type) { + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + mmhub_v2_0_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + mmhub_v2_0_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: + break; + } + + return 0; +} + +void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +{ + int data, data1; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG); + + data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2); + + /* AMD_CG_SUPPORT_MC_MGCG */ + if ((data & MM_ATC_L2_MISC_CG__ENABLE_MASK) && + !(data1 & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK))) + *flags |= AMD_CG_SUPPORT_MC_MGCG; + + /* AMD_CG_SUPPORT_MC_LS */ + if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_MC_LS; +} diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h new file mode 100644 index 000000000000..3ea4344f0315 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h @@ -0,0 +1,37 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __MMHUB_V2_0_H__ +#define __MMHUB_V2_0_H__ + +int mmhub_v2_0_gart_enable(struct amdgpu_device *adev); +void mmhub_v2_0_gart_disable(struct amdgpu_device *adev); +void mmhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, + bool value); +void mmhub_v2_0_init(struct amdgpu_device *adev); +int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state); +void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); +void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c new file mode 100644 index 000000000000..66efe2f7bd76 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -0,0 +1,657 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "mmhub_v9_4.h" + +#include "mmhub/mmhub_9_4_1_offset.h" +#include "mmhub/mmhub_9_4_1_sh_mask.h" +#include "mmhub/mmhub_9_4_1_default.h" +#include "athub/athub_1_0_offset.h" +#include "athub/athub_1_0_sh_mask.h" +#include "vega10_enum.h" + +#include "soc15_common.h" + +#define MMHUB_NUM_INSTANCES 2 +#define MMHUB_INSTANCE_REGISTER_OFFSET 0x3000 + +u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev) +{ + /* The base should be same b/t 2 mmhubs on Acrturus. Read one here. */ + u64 base = RREG32_SOC15(MMHUB, 0, mmVMSHAREDVC0_MC_VM_FB_LOCATION_BASE); + u64 top = RREG32_SOC15(MMHUB, 0, mmVMSHAREDVC0_MC_VM_FB_LOCATION_TOP); + + base &= VMSHAREDVC0_MC_VM_FB_LOCATION_BASE__FB_BASE_MASK; + base <<= 24; + + top &= VMSHAREDVC0_MC_VM_FB_LOCATION_TOP__FB_TOP_MASK; + top <<= 24; + + adev->gmc.fb_start = base; + adev->gmc.fb_end = top; + + return base; +} + +void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid, + uint32_t vmid, uint64_t value) +{ + /* two registers distance between mmVML2VC0_VM_CONTEXT0_* to + * mmVML2VC0_VM_CONTEXT1_* + */ + int dist = mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 + - mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + dist * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + lower_32_bits(value)); + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + dist * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + upper_32_bits(value)); + +} + +static void mmhub_v9_4_init_gart_aperture_regs(struct amdgpu_device *adev, + int hubid) +{ + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + mmhub_v9_4_setup_vm_pt_regs(adev, hubid, 0, pt_base); + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(adev->gmc.gart_start >> 12)); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(adev->gmc.gart_start >> 44)); + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(adev->gmc.gart_end >> 44)); +} + +static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev, + int hubid) +{ + uint64_t value; + uint32_t tmp; + + /* Program the AGP BAR */ + WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_BASE, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_TOP, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + adev->gmc.agp_end >> 24); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_BOT, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + adev->gmc.agp_start >> 24); + + /* Program the system aperture low logical page number. */ + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_LOW_ADDR, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); + + /* Set default page address. */ + value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(value >> 12)); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)((u64)adev->dummy_page_addr >> 44)); + + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); +} + +static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid) +{ + uint32_t tmp; + + /* Setup TLB control */ + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET); + + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + ENABLE_L1_TLB, 1); + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + ECO_BITS, 0); + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC);/* XXX for emulation. */ + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + ATC_EN, 1); + + WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); +} + +static void mmhub_v9_4_init_cache_regs(struct amdgpu_device *adev, int hubid) +{ + uint32_t tmp; + + /* Setup L2 cache */ + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + ENABLE_L2_CACHE, 1); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + ENABLE_L2_FRAGMENT_PROCESSING, 1); + /* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + PDE_FAULT_CLASSIFICATION, 0); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL2, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL2, + INVALIDATE_ALL_L1_TLBS, 1); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL2, + INVALIDATE_L2_CACHE, 1); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL2, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + + tmp = mmVML2PF0_VM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + + tmp = mmVML2PF0_VM_L2_CNTL4_DEFAULT; + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL4, + VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL4, + VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL4, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); +} + +static void mmhub_v9_4_enable_system_domain(struct amdgpu_device *adev, + int hubid) +{ + uint32_t tmp; + + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); +} + +static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev, + int hubid) +{ + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0XFFFFFFFF); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0x0000000F); + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0); + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0); +} + +static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) +{ + uint32_t tmp; + int i; + + for (i = 0; i <= 14; i++) { + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + PAGE_TABLE_DEPTH, + adev->vm_manager.num_level); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, + 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, + adev->vm_manager.block_size - 9); + /* Send no-retry XNACK on fault to suppress VM fault storm. */ + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i, + tmp); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, + upper_32_bits(adev->vm_manager.max_pfn - 1)); + } +} + +static void mmhub_v9_4_program_invalidation(struct amdgpu_device *adev, + int hubid) +{ + unsigned i; + + for (i = 0; i < 18; ++i) { + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + 2 * i, + 0xffffffff); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + 2 * i, + 0x1f); + } +} + +int mmhub_v9_4_gart_enable(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { + if (amdgpu_sriov_vf(adev)) { + /* + * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase + * they are VF copy registers so vbios post doesn't + * program them, for SRIOV driver need to program them + */ + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDVC0_MC_VM_FB_LOCATION_BASE, + i * MMHUB_INSTANCE_REGISTER_OFFSET, + adev->gmc.vram_start >> 24); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDVC0_MC_VM_FB_LOCATION_TOP, + i * MMHUB_INSTANCE_REGISTER_OFFSET, + adev->gmc.vram_end >> 24); + } + + /* GART Enable. */ + mmhub_v9_4_init_gart_aperture_regs(adev, i); + mmhub_v9_4_init_system_aperture_regs(adev, i); + mmhub_v9_4_init_tlb_regs(adev, i); + mmhub_v9_4_init_cache_regs(adev, i); + + mmhub_v9_4_enable_system_domain(adev, i); + mmhub_v9_4_disable_identity_aperture(adev, i); + mmhub_v9_4_setup_vmid_config(adev, i); + mmhub_v9_4_program_invalidation(adev, i); + } + + return 0; +} + +void mmhub_v9_4_gart_disable(struct amdgpu_device *adev) +{ + u32 tmp; + u32 i, j; + + for (j = 0; j < MMHUB_NUM_INSTANCES; j++) { + /* Disable all tables */ + for (i = 0; i < 16; i++) + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_CNTL, + j * MMHUB_INSTANCE_REGISTER_OFFSET + + i, 0); + + /* Setup TLB control */ + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + j * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, + VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + j * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + + /* Setup L2 cache */ + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL, + j * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + ENABLE_L2_CACHE, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL, + j * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3, + j * MMHUB_INSTANCE_REGISTER_OFFSET, 0); + } +} + +/** + * mmhub_v1_0_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +void mmhub_v9_4_set_fault_enable_default(struct amdgpu_device *adev, bool value) +{ + u32 tmp; + int i; + + for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + i * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, + VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + if (!value) { + tmp = REG_SET_FIELD(tmp, + VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_NO_RETRY_FAULT, 1); + tmp = REG_SET_FIELD(tmp, + VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_RETRY_FAULT, 1); + } + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + i * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + } +} + +void mmhub_v9_4_init(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub[MMHUB_NUM_INSTANCES] = + {&adev->vmhub[AMDGPU_MMHUB_0], &adev->vmhub[AMDGPU_MMHUB_1]}; + int i; + + for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { + hub[i]->ctx0_ptb_addr_lo32 = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->ctx0_ptb_addr_hi32 = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_INVALIDATE_ENG0_SEM) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_inv_eng0_req = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_INVALIDATE_ENG0_REQ) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_inv_eng0_ack = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_INVALIDATE_ENG0_ACK) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_context0_cntl = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_CNTL) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_l2_pro_fault_status = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_STATUS) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_l2_pro_fault_cntl = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + } +} + +static void mmhub_v9_4_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data, def1, data1; + int i, j; + int dist = mmDAGB1_CNTL_MISC2 - mmDAGB0_CNTL_MISC2; + + for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { + def = data = RREG32_SOC15_OFFSET(MMHUB, 0, + mmATCL2_0_ATC_L2_MISC_CG, + i * MMHUB_INSTANCE_REGISTER_OFFSET); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) + data |= ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK; + else + data &= ~ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK; + + if (def != data) + WREG32_SOC15_OFFSET(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG, + i * MMHUB_INSTANCE_REGISTER_OFFSET, data); + + for (j = 0; j < 5; j++) { + def1 = data1 = RREG32_SOC15_OFFSET(MMHUB, 0, + mmDAGB0_CNTL_MISC2, + i * MMHUB_INSTANCE_REGISTER_OFFSET + + j * dist); + if (enable && + (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { + data1 &= + ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + } else { + data1 |= + (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + } + + if (def1 != data1) + WREG32_SOC15_OFFSET(MMHUB, 0, + mmDAGB0_CNTL_MISC2, + i * MMHUB_INSTANCE_REGISTER_OFFSET + + j * dist, data1); + + if (i == 1 && j == 3) + break; + } + } +} + +static void mmhub_v9_4_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + int i; + + for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { + def = data = RREG32_SOC15_OFFSET(MMHUB, 0, + mmATCL2_0_ATC_L2_MISC_CG, + i * MMHUB_INSTANCE_REGISTER_OFFSET); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) + data |= ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + else + data &= ~ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + + if (def != data) + WREG32_SOC15_OFFSET(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG, + i * MMHUB_INSTANCE_REGISTER_OFFSET, data); + } +} + +int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state) +{ + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (adev->asic_type) { + case CHIP_ARCTURUS: + mmhub_v9_4_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + mmhub_v9_4_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: + break; + } + + return 0; +} + +void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags) +{ + int data, data1; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + /* AMD_CG_SUPPORT_MC_MGCG */ + data = RREG32_SOC15(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG); + + data1 = RREG32_SOC15(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG); + + if ((data & ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK) && + !(data1 & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK))) + *flags |= AMD_CG_SUPPORT_MC_MGCG; + + /* AMD_CG_SUPPORT_MC_LS */ + if (data & ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_MC_LS; +} diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h new file mode 100644 index 000000000000..d435cfcec1a8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h @@ -0,0 +1,36 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __MMHUB_V9_4_H__ +#define __MMHUB_V9_4_H__ + +u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev); +int mmhub_v9_4_gart_enable(struct amdgpu_device *adev); +void mmhub_v9_4_gart_disable(struct amdgpu_device *adev); +void mmhub_v9_4_set_fault_enable_default(struct amdgpu_device *adev, + bool value); +void mmhub_v9_4_init(struct amdgpu_device *adev); +int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state); +void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 73851ebb3833..cc5bf595f9b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -26,6 +26,7 @@ #include "nbio/nbio_6_1_sh_mask.h" #include "gc/gc_9_0_offset.h" #include "gc/gc_9_0_sh_mask.h" +#include "mp/mp_9_0_offset.h" #include "soc15.h" #include "vega10_ih.h" #include "soc15_common.h" @@ -157,6 +158,82 @@ static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev, xgpu_ai_mailbox_set_valid(adev, false); } +static int xgpu_ai_get_pp_clk(struct amdgpu_device *adev, u32 type, char *buf) +{ + int r = 0; + u32 req, val, size; + + if (!amdgim_is_hwperf(adev) || buf == NULL) + return -EBADRQC; + + switch(type) { + case PP_SCLK: + req = IDH_IRQ_GET_PP_SCLK; + break; + case PP_MCLK: + req = IDH_IRQ_GET_PP_MCLK; + break; + default: + return -EBADRQC; + } + + mutex_lock(&adev->virt.dpm_mutex); + + xgpu_ai_mailbox_trans_msg(adev, req, 0, 0, 0); + + r = xgpu_ai_poll_msg(adev, IDH_SUCCESS); + if (!r && adev->fw_vram_usage.va != NULL) { + val = RREG32_NO_KIQ( + SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW1)); + size = strnlen((((char *)adev->virt.fw_reserve.p_pf2vf) + + val), PAGE_SIZE); + + if (size < PAGE_SIZE) + strcpy(buf,((char *)adev->virt.fw_reserve.p_pf2vf + val)); + else + size = 0; + + r = size; + goto out; + } + + r = xgpu_ai_poll_msg(adev, IDH_FAIL); + if(r) + pr_info("%s DPM request failed", + (type == PP_SCLK)? "SCLK" : "MCLK"); + +out: + mutex_unlock(&adev->virt.dpm_mutex); + return r; +} + +static int xgpu_ai_force_dpm_level(struct amdgpu_device *adev, u32 level) +{ + int r = 0; + u32 req = IDH_IRQ_FORCE_DPM_LEVEL; + + if (!amdgim_is_hwperf(adev)) + return -EBADRQC; + + mutex_lock(&adev->virt.dpm_mutex); + xgpu_ai_mailbox_trans_msg(adev, req, level, 0, 0); + + r = xgpu_ai_poll_msg(adev, IDH_SUCCESS); + if (!r) + goto out; + + r = xgpu_ai_poll_msg(adev, IDH_FAIL); + if (!r) + pr_info("DPM request failed"); + else + pr_info("Mailbox is broken"); + +out: + mutex_unlock(&adev->virt.dpm_mutex); + return r; +} + static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, enum idh_request req) { @@ -267,7 +344,7 @@ flr_done: /* Trigger recovery for world switch failure if no TDR */ if (amdgpu_device_should_recover_gpu(adev) - && amdgpu_lockup_timeout == MAX_SCHEDULE_TIMEOUT) + && adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT) amdgpu_device_gpu_recover(adev, NULL); } @@ -296,6 +373,9 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev, if (amdgpu_sriov_runtime(adev)) schedule_work(&adev->virt.flr_work); break; + case IDH_QUERY_ALIVE: + xgpu_ai_mailbox_send_ack(adev); + break; /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore * it byfar since that polling thread will handle it, * other msg like flr complete is not handled here. @@ -375,4 +455,6 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = { .reset_gpu = xgpu_ai_request_reset, .wait_reset = NULL, .trans_msg = xgpu_ai_mailbox_trans_msg, + .get_pp_clk = xgpu_ai_get_pp_clk, + .force_dpm_level = xgpu_ai_force_dpm_level, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h index b4a9ceea334b..077e91a33d62 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h @@ -35,6 +35,10 @@ enum idh_request { IDH_REL_GPU_FINI_ACCESS, IDH_REQ_GPU_RESET_ACCESS, + IDH_IRQ_FORCE_DPM_LEVEL = 10, + IDH_IRQ_GET_PP_SCLK, + IDH_IRQ_GET_PP_MCLK, + IDH_LOG_VF_ERROR = 200, }; @@ -43,6 +47,9 @@ enum idh_event { IDH_READY_TO_ACCESS_GPU, IDH_FLR_NOTIFICATION, IDH_FLR_NOTIFICATION_CMPL, + IDH_SUCCESS, + IDH_FAIL, + IDH_QUERY_ALIVE, IDH_EVENT_MAX }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c new file mode 100644 index 000000000000..0d8767eb7a70 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -0,0 +1,380 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "nbio/nbio_2_3_offset.h" +#include "nbio/nbio_2_3_sh_mask.h" +#include "gc/gc_10_1_0_offset.h" +#include "gc/gc_10_1_0_sh_mask.h" +#include "soc15.h" +#include "navi10_ih.h" +#include "soc15_common.h" +#include "mxgpu_nv.h" +#include "mxgpu_ai.h" + +static void xgpu_nv_mailbox_send_ack(struct amdgpu_device *adev) +{ + WREG8(NV_MAIBOX_CONTROL_RCV_OFFSET_BYTE, 2); +} + +static void xgpu_nv_mailbox_set_valid(struct amdgpu_device *adev, bool val) +{ + WREG8(NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE, val ? 1 : 0); +} + +/* + * this peek_msg could *only* be called in IRQ routine becuase in IRQ routine + * RCV_MSG_VALID filed of BIF_BX_PF_MAILBOX_CONTROL must already be set to 1 + * by host. + * + * if called no in IRQ routine, this peek_msg cannot guaranteed to return the + * correct value since it doesn't return the RCV_DW0 under the case that + * RCV_MSG_VALID is set by host. + */ +static enum idh_event xgpu_nv_mailbox_peek_msg(struct amdgpu_device *adev) +{ + return RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW0)); +} + + +static int xgpu_nv_mailbox_rcv_msg(struct amdgpu_device *adev, + enum idh_event event) +{ + u32 reg; + + reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW0)); + if (reg != event) + return -ENOENT; + + xgpu_nv_mailbox_send_ack(adev); + + return 0; +} + +static uint8_t xgpu_nv_peek_ack(struct amdgpu_device *adev) +{ + return RREG8(NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE) & 2; +} + +static int xgpu_nv_poll_ack(struct amdgpu_device *adev) +{ + int timeout = NV_MAILBOX_POLL_ACK_TIMEDOUT; + u8 reg; + + do { + reg = RREG8(NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE); + if (reg & 2) + return 0; + + mdelay(5); + timeout -= 5; + } while (timeout > 1); + + pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", NV_MAILBOX_POLL_ACK_TIMEDOUT); + + return -ETIME; +} + +static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event) +{ + int r, timeout = NV_MAILBOX_POLL_MSG_TIMEDOUT; + + do { + r = xgpu_nv_mailbox_rcv_msg(adev, event); + if (!r) + return 0; + + msleep(10); + timeout -= 10; + } while (timeout > 1); + + pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r); + + return -ETIME; +} + +static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev, + enum idh_request req, u32 data1, u32 data2, u32 data3) +{ + u32 reg; + int r; + uint8_t trn; + + /* IMPORTANT: + * clear TRN_MSG_VALID valid to clear host's RCV_MSG_ACK + * and with host's RCV_MSG_ACK cleared hw automatically clear host's RCV_MSG_ACK + * which lead to VF's TRN_MSG_ACK cleared, otherwise below xgpu_nv_poll_ack() + * will return immediatly + */ + do { + xgpu_nv_mailbox_set_valid(adev, false); + trn = xgpu_nv_peek_ack(adev); + if (trn) { + pr_err("trn=%x ACK should not assert! wait again !\n", trn); + msleep(1); + } + } while (trn); + + reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0)); + reg = REG_SET_FIELD(reg, BIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0, + MSGBUF_DATA, req); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0), + reg); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW1), + data1); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW2), + data2); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW3), + data3); + + xgpu_nv_mailbox_set_valid(adev, true); + + /* start to poll ack */ + r = xgpu_nv_poll_ack(adev); + if (r) + pr_err("Doesn't get ack from pf, continue\n"); + + xgpu_nv_mailbox_set_valid(adev, false); +} + +static int xgpu_nv_send_access_requests(struct amdgpu_device *adev, + enum idh_request req) +{ + int r; + + xgpu_nv_mailbox_trans_msg(adev, req, 0, 0, 0); + + /* start to check msg if request is idh_req_gpu_init_access */ + if (req == IDH_REQ_GPU_INIT_ACCESS || + req == IDH_REQ_GPU_FINI_ACCESS || + req == IDH_REQ_GPU_RESET_ACCESS) { + r = xgpu_nv_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); + if (r) { + pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n"); + return r; + } + /* Retrieve checksum from mailbox2 */ + if (req == IDH_REQ_GPU_INIT_ACCESS || req == IDH_REQ_GPU_RESET_ACCESS) { + adev->virt.fw_reserve.checksum_key = + RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW2)); + } + } + + return 0; +} + +static int xgpu_nv_request_reset(struct amdgpu_device *adev) +{ + return xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS); +} + +static int xgpu_nv_request_full_gpu_access(struct amdgpu_device *adev, + bool init) +{ + enum idh_request req; + + req = init ? IDH_REQ_GPU_INIT_ACCESS : IDH_REQ_GPU_FINI_ACCESS; + return xgpu_nv_send_access_requests(adev, req); +} + +static int xgpu_nv_release_full_gpu_access(struct amdgpu_device *adev, + bool init) +{ + enum idh_request req; + int r = 0; + + req = init ? IDH_REL_GPU_INIT_ACCESS : IDH_REL_GPU_FINI_ACCESS; + r = xgpu_nv_send_access_requests(adev, req); + + return r; +} + +static int xgpu_nv_mailbox_ack_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("get ack intr and do nothing.\n"); + return 0; +} + +static int xgpu_nv_set_mailbox_ack_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL)); + + tmp = REG_SET_FIELD(tmp, BIF_BX_PF_MAILBOX_INT_CNTL, ACK_INT_EN, + (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL), tmp); + + return 0; +} + +static void xgpu_nv_mailbox_flr_work(struct work_struct *work) +{ + struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); + struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); + int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT; + int locked; + + /* block amdgpu_gpu_recover till msg FLR COMPLETE received, + * otherwise the mailbox msg will be ruined/reseted by + * the VF FLR. + * + * we can unlock the lock_reset to allow "amdgpu_job_timedout" + * to run gpu_recover() after FLR_NOTIFICATION_CMPL received + * which means host side had finished this VF's FLR. + */ + locked = mutex_trylock(&adev->lock_reset); + if (locked) + adev->in_gpu_reset = 1; + + do { + if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) + goto flr_done; + + msleep(10); + timeout -= 10; + } while (timeout > 1); + +flr_done: + if (locked) { + adev->in_gpu_reset = 0; + mutex_unlock(&adev->lock_reset); + } + + /* Trigger recovery for world switch failure if no TDR */ + if (amdgpu_device_should_recover_gpu(adev)) + amdgpu_device_gpu_recover(adev, NULL); +} + +static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL)); + + tmp = REG_SET_FIELD(tmp, BIF_BX_PF_MAILBOX_INT_CNTL, VALID_INT_EN, + (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL), tmp); + + return 0; +} + +static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + enum idh_event event = xgpu_nv_mailbox_peek_msg(adev); + + switch (event) { + case IDH_FLR_NOTIFICATION: + if (amdgpu_sriov_runtime(adev)) + schedule_work(&adev->virt.flr_work); + break; + /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore + * it byfar since that polling thread will handle it, + * other msg like flr complete is not handled here. + */ + case IDH_CLR_MSG_BUF: + case IDH_FLR_NOTIFICATION_CMPL: + case IDH_READY_TO_ACCESS_GPU: + default: + break; + } + + return 0; +} + +static const struct amdgpu_irq_src_funcs xgpu_nv_mailbox_ack_irq_funcs = { + .set = xgpu_nv_set_mailbox_ack_irq, + .process = xgpu_nv_mailbox_ack_irq, +}; + +static const struct amdgpu_irq_src_funcs xgpu_nv_mailbox_rcv_irq_funcs = { + .set = xgpu_nv_set_mailbox_rcv_irq, + .process = xgpu_nv_mailbox_rcv_irq, +}; + +void xgpu_nv_mailbox_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->virt.ack_irq.num_types = 1; + adev->virt.ack_irq.funcs = &xgpu_nv_mailbox_ack_irq_funcs; + adev->virt.rcv_irq.num_types = 1; + adev->virt.rcv_irq.funcs = &xgpu_nv_mailbox_rcv_irq_funcs; +} + +int xgpu_nv_mailbox_add_irq_id(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq); + if (r) + return r; + + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq); + if (r) { + amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); + return r; + } + + return 0; +} + +int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_irq_get(adev, &adev->virt.rcv_irq, 0); + if (r) + return r; + r = amdgpu_irq_get(adev, &adev->virt.ack_irq, 0); + if (r) { + amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); + return r; + } + + INIT_WORK(&adev->virt.flr_work, xgpu_nv_mailbox_flr_work); + + return 0; +} + +void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev) +{ + amdgpu_irq_put(adev, &adev->virt.ack_irq, 0); + amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); +} + +const struct amdgpu_virt_ops xgpu_nv_virt_ops = { + .req_full_gpu = xgpu_nv_request_full_gpu_access, + .rel_full_gpu = xgpu_nv_release_full_gpu_access, + .reset_gpu = xgpu_nv_request_reset, + .wait_reset = NULL, + .trans_msg = xgpu_nv_mailbox_trans_msg, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h new file mode 100644 index 000000000000..99b15f6865cb --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h @@ -0,0 +1,41 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __MXGPU_NV_H__ +#define __MXGPU_NV_H__ + +#define NV_MAILBOX_POLL_ACK_TIMEDOUT 500 +#define NV_MAILBOX_POLL_MSG_TIMEDOUT 12000 +#define NV_MAILBOX_POLL_FLR_TIMEDOUT 500 + +extern const struct amdgpu_virt_ops xgpu_nv_virt_ops; + +void xgpu_nv_mailbox_set_irq_funcs(struct amdgpu_device *adev); +int xgpu_nv_mailbox_add_irq_id(struct amdgpu_device *adev); +int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev); +void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev); + +#define NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE (SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_CONTROL) * 4) +#define NV_MAIBOX_CONTROL_RCV_OFFSET_BYTE (SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_CONTROL) * 4 + 1) + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 6a0fcd67662a..aef9d059ae52 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -515,7 +515,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) /* wait until RCV_MSG become 3 */ if (xgpu_vi_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL)) { - pr_err("failed to recieve FLR_CMPL\n"); + pr_err("failed to receive FLR_CMPL\n"); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c new file mode 100644 index 000000000000..9af73567e716 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -0,0 +1,487 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/pci.h> + +#include "amdgpu.h" +#include "amdgpu_ih.h" + +#include "oss/osssys_5_0_0_offset.h" +#include "oss/osssys_5_0_0_sh_mask.h" + +#include "soc15_common.h" +#include "navi10_ih.h" + + +static void navi10_ih_set_interrupt_funcs(struct amdgpu_device *adev); + +/** + * navi10_ih_enable_interrupts - Enable the interrupt ring buffer + * + * @adev: amdgpu_device pointer + * + * Enable the interrupt ring buffer (NAVI10). + */ +static void navi10_ih_enable_interrupts(struct amdgpu_device *adev) +{ + u32 ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL); + + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + adev->irq.ih.enabled = true; +} + +/** + * navi10_ih_disable_interrupts - Disable the interrupt ring buffer + * + * @adev: amdgpu_device pointer + * + * Disable the interrupt ring buffer (NAVI10). + */ +static void navi10_ih_disable_interrupts(struct amdgpu_device *adev) +{ + u32 ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL); + + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + /* set rptr, wptr to 0 */ + WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0); + adev->irq.ih.enabled = false; + adev->irq.ih.rptr = 0; +} + +static uint32_t navi10_ih_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl) +{ + int rb_bufsz = order_base_2(ih->ring_size / 4); + + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + MC_SPACE, ih->use_bus_addr ? 1 : 4); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_OVERFLOW_CLEAR, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_OVERFLOW_ENABLE, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz); + /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register + * value is written to memory + */ + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_WRITEBACK_ENABLE, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0); + + return ih_rb_cntl; +} + +/** + * navi10_ih_irq_init - init and enable the interrupt ring + * + * @adev: amdgpu_device pointer + * + * Allocate a ring buffer for the interrupt controller, + * enable the RLC, disable interrupts, enable the IH + * ring buffer and enable it (NAVI). + * Called at device load and reume. + * Returns 0 for success, errors for failure. + */ +static int navi10_ih_irq_init(struct amdgpu_device *adev) +{ + struct amdgpu_ih_ring *ih = &adev->irq.ih; + int ret = 0; + u32 ih_rb_cntl, ih_doorbell_rtpr, ih_chicken; + u32 tmp; + + /* disable irqs */ + navi10_ih_disable_interrupts(adev); + + adev->nbio.funcs->ih_control(adev); + + /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ + WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE, ih->gpu_addr >> 8); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI, (ih->gpu_addr >> 40) & 0xff); + + ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL); + ih_rb_cntl = navi10_ih_rb_cntl(ih, ih_rb_cntl); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM, + !!adev->irq.msi_enabled); + + if (unlikely(adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)) { + if (ih->use_bus_addr) { + ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN); + ih_chicken = REG_SET_FIELD(ih_chicken, + IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1); + WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken); + } + } + + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + + /* set the writeback address whether it's enabled or not */ + WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, + lower_32_bits(ih->wptr_addr)); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI, + upper_32_bits(ih->wptr_addr) & 0xFFFF); + + /* set rptr, wptr to 0 */ + WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0); + + ih_doorbell_rtpr = RREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR); + if (ih->use_doorbell) { + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, OFFSET, + ih->doorbell_index); + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, ENABLE, 1); + } else { + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, ENABLE, 0); + } + WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR, ih_doorbell_rtpr); + + adev->nbio.funcs->ih_doorbell_range(adev, ih->use_doorbell, + ih->doorbell_index); + + tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL); + tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL, + CLIENT18_IS_STORM_CLIENT, 1); + WREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL, tmp); + + tmp = RREG32_SOC15(OSSSYS, 0, mmIH_INT_FLOOD_CNTL); + tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1); + WREG32_SOC15(OSSSYS, 0, mmIH_INT_FLOOD_CNTL, tmp); + + pci_set_master(adev->pdev); + + /* enable interrupts */ + navi10_ih_enable_interrupts(adev); + + return ret; +} + +/** + * navi10_ih_irq_disable - disable interrupts + * + * @adev: amdgpu_device pointer + * + * Disable interrupts on the hw (NAVI10). + */ +static void navi10_ih_irq_disable(struct amdgpu_device *adev) +{ + navi10_ih_disable_interrupts(adev); + + /* Wait and acknowledge irq */ + mdelay(1); +} + +/** + * navi10_ih_get_wptr - get the IH ring buffer wptr + * + * @adev: amdgpu_device pointer + * + * Get the IH ring buffer wptr from either the register + * or the writeback memory buffer (NAVI10). Also check for + * ring buffer overflow and deal with it. + * Returns the value of the wptr. + */ +static u32 navi10_ih_get_wptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + u32 wptr, reg, tmp; + + wptr = le32_to_cpu(*ih->wptr_cpu); + + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR); + wptr = RREG32_NO_KIQ(reg); + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + + /* When a ring buffer overflow happen start parsing interrupt + * from the last not overwritten vector (wptr + 32). Hopefully + * this should allow us to catch up. + */ + tmp = (wptr + 32) & ih->ptr_mask; + dev_warn(adev->dev, "IH ring buffer overflow " + "(0x%08X, 0x%08X, 0x%08X)\n", + wptr, ih->rptr, tmp); + ih->rptr = tmp; + + reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL); + tmp = RREG32_NO_KIQ(reg); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32_NO_KIQ(reg, tmp); +out: + return (wptr & ih->ptr_mask); +} + +/** + * navi10_ih_decode_iv - decode an interrupt vector + * + * @adev: amdgpu_device pointer + * + * Decodes the interrupt vector at the current rptr + * position and also advance the position. + */ +static void navi10_ih_decode_iv(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih, + struct amdgpu_iv_entry *entry) +{ + /* wptr/rptr are in bytes! */ + u32 ring_index = ih->rptr >> 2; + uint32_t dw[8]; + + dw[0] = le32_to_cpu(ih->ring[ring_index + 0]); + dw[1] = le32_to_cpu(ih->ring[ring_index + 1]); + dw[2] = le32_to_cpu(ih->ring[ring_index + 2]); + dw[3] = le32_to_cpu(ih->ring[ring_index + 3]); + dw[4] = le32_to_cpu(ih->ring[ring_index + 4]); + dw[5] = le32_to_cpu(ih->ring[ring_index + 5]); + dw[6] = le32_to_cpu(ih->ring[ring_index + 6]); + dw[7] = le32_to_cpu(ih->ring[ring_index + 7]); + + entry->client_id = dw[0] & 0xff; + entry->src_id = (dw[0] >> 8) & 0xff; + entry->ring_id = (dw[0] >> 16) & 0xff; + entry->vmid = (dw[0] >> 24) & 0xf; + entry->vmid_src = (dw[0] >> 31); + entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32); + entry->timestamp_src = dw[2] >> 31; + entry->pasid = dw[3] & 0xffff; + entry->pasid_src = dw[3] >> 31; + entry->src_data[0] = dw[4]; + entry->src_data[1] = dw[5]; + entry->src_data[2] = dw[6]; + entry->src_data[3] = dw[7]; + + /* wptr/rptr are in bytes! */ + ih->rptr += 32; +} + +/** + * navi10_ih_set_rptr - set the IH ring buffer rptr + * + * @adev: amdgpu_device pointer + * + * Set the IH ring buffer rptr. + */ +static void navi10_ih_set_rptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + if (ih->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + *ih->rptr_cpu = ih->rptr; + WDOORBELL32(ih->doorbell_index, ih->rptr); + } else + WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, ih->rptr); +} + +static int navi10_ih_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + navi10_ih_set_interrupt_funcs(adev); + return 0; +} + +static int navi10_ih_sw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool use_bus_addr; + + /* use gpu virtual address for ih ring + * until ih_checken is programmed to allow + * use bus address for ih ring by psp bl */ + use_bus_addr = + (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true; + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr); + if (r) + return r; + + adev->irq.ih.use_doorbell = true; + adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1; + + r = amdgpu_irq_init(adev); + + return r; +} + +static int navi10_ih_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_irq_fini(adev); + amdgpu_ih_ring_fini(adev, &adev->irq.ih); + + return 0; +} + +static int navi10_ih_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = navi10_ih_irq_init(adev); + if (r) + return r; + + return 0; +} + +static int navi10_ih_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + navi10_ih_irq_disable(adev); + + return 0; +} + +static int navi10_ih_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return navi10_ih_hw_fini(adev); +} + +static int navi10_ih_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return navi10_ih_hw_init(adev); +} + +static bool navi10_ih_is_idle(void *handle) +{ + /* todo */ + return true; +} + +static int navi10_ih_wait_for_idle(void *handle) +{ + /* todo */ + return -ETIMEDOUT; +} + +static int navi10_ih_soft_reset(void *handle) +{ + /* todo */ + return 0; +} + +static void navi10_ih_update_clockgating_state(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def, field_val; + + if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) { + def = data = RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL); + field_val = enable ? 0 : 1; + data = REG_SET_FIELD(data, IH_CLK_CTRL, + DBUS_MUX_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + DYN_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + REG_CLK_SOFT_OVERRIDE, field_val); + if (def != data) + WREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL, data); + } + + return; +} + +static int navi10_ih_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + navi10_ih_update_clockgating_state(adev, + state == AMD_CG_STATE_GATE ? true : false); + return 0; +} + +static int navi10_ih_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +static void navi10_ih_get_clockgating_state(void *handle, u32 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL)) + *flags |= AMD_CG_SUPPORT_IH_CG; + + return; +} + +static const struct amd_ip_funcs navi10_ih_ip_funcs = { + .name = "navi10_ih", + .early_init = navi10_ih_early_init, + .late_init = NULL, + .sw_init = navi10_ih_sw_init, + .sw_fini = navi10_ih_sw_fini, + .hw_init = navi10_ih_hw_init, + .hw_fini = navi10_ih_hw_fini, + .suspend = navi10_ih_suspend, + .resume = navi10_ih_resume, + .is_idle = navi10_ih_is_idle, + .wait_for_idle = navi10_ih_wait_for_idle, + .soft_reset = navi10_ih_soft_reset, + .set_clockgating_state = navi10_ih_set_clockgating_state, + .set_powergating_state = navi10_ih_set_powergating_state, + .get_clockgating_state = navi10_ih_get_clockgating_state, +}; + +static const struct amdgpu_ih_funcs navi10_ih_funcs = { + .get_wptr = navi10_ih_get_wptr, + .decode_iv = navi10_ih_decode_iv, + .set_rptr = navi10_ih_set_rptr +}; + +static void navi10_ih_set_interrupt_funcs(struct amdgpu_device *adev) +{ + if (adev->irq.ih_funcs == NULL) + adev->irq.ih_funcs = &navi10_ih_funcs; +} + +const struct amdgpu_ip_block_version navi10_ih_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_IH, + .major = 5, + .minor = 0, + .rev = 0, + .funcs = &navi10_ih_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.h b/drivers/gpu/drm/amd/amdgpu/navi10_ih.h new file mode 100644 index 000000000000..140fbdaaed17 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.h @@ -0,0 +1,29 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __NAVI10_IH_H__ +#define __NAVI10_IH_H__ + +extern const struct amdgpu_ip_block_version navi10_ih_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c new file mode 100644 index 000000000000..88efaecf9f70 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c @@ -0,0 +1,55 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "nv.h" + +#include "soc15_common.h" +#include "navi10_ip_offset.h" + +int navi10_reg_base_init(struct amdgpu_device *adev) +{ + int i; + + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); + adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCN_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); + } + + return 0; +} + + diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h new file mode 100644 index 000000000000..074a9a09c0a7 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h @@ -0,0 +1,4806 @@ +/* + * Copyright (C) 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __NAVI10_SDMA_PKT_OPEN_H_ +#define __NAVI10_SDMA_PKT_OPEN_H_ + +#define SDMA_OP_NOP 0 +#define SDMA_OP_COPY 1 +#define SDMA_OP_WRITE 2 +#define SDMA_OP_INDIRECT 4 +#define SDMA_OP_FENCE 5 +#define SDMA_OP_TRAP 6 +#define SDMA_OP_SEM 7 +#define SDMA_OP_POLL_REGMEM 8 +#define SDMA_OP_COND_EXE 9 +#define SDMA_OP_ATOMIC 10 +#define SDMA_OP_CONST_FILL 11 +#define SDMA_OP_PTEPDE 12 +#define SDMA_OP_TIMESTAMP 13 +#define SDMA_OP_SRBM_WRITE 14 +#define SDMA_OP_PRE_EXE 15 +#define SDMA_OP_GPUVM_INV 16 +#define SDMA_OP_GCR_REQ 17 +#define SDMA_OP_DUMMY_TRAP 32 +#define SDMA_SUBOP_TIMESTAMP_SET 0 +#define SDMA_SUBOP_TIMESTAMP_GET 1 +#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL 2 +#define SDMA_SUBOP_COPY_LINEAR 0 +#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND 4 +#define SDMA_SUBOP_COPY_TILED 1 +#define SDMA_SUBOP_COPY_TILED_SUB_WIND 5 +#define SDMA_SUBOP_COPY_T2T_SUB_WIND 6 +#define SDMA_SUBOP_COPY_SOA 3 +#define SDMA_SUBOP_COPY_DIRTY_PAGE 7 +#define SDMA_SUBOP_COPY_LINEAR_PHY 8 +#define SDMA_SUBOP_COPY_LINEAR_BC 16 +#define SDMA_SUBOP_COPY_TILED_BC 17 +#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND_BC 20 +#define SDMA_SUBOP_COPY_TILED_SUB_WIND_BC 21 +#define SDMA_SUBOP_COPY_T2T_SUB_WIND_BC 22 +#define SDMA_SUBOP_WRITE_LINEAR 0 +#define SDMA_SUBOP_WRITE_TILED 1 +#define SDMA_SUBOP_WRITE_TILED_BC 17 +#define SDMA_SUBOP_PTEPDE_GEN 0 +#define SDMA_SUBOP_PTEPDE_COPY 1 +#define SDMA_SUBOP_PTEPDE_RMW 2 +#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS 3 +#define SDMA_SUBOP_DATA_FILL_MULTI 1 +#define SDMA_SUBOP_POLL_REG_WRITE_MEM 1 +#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM 2 +#define SDMA_SUBOP_POLL_MEM_VERIFY 3 +#define HEADER_AGENT_DISPATCH 4 +#define HEADER_BARRIER 5 +#define SDMA_OP_AQL_COPY 0 +#define SDMA_OP_AQL_BARRIER_OR 0 + +/*define for op field*/ +#define SDMA_PKT_HEADER_op_offset 0 +#define SDMA_PKT_HEADER_op_mask 0x000000FF +#define SDMA_PKT_HEADER_op_shift 0 +#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_HEADER_sub_op_offset 0 +#define SDMA_PKT_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_HEADER_sub_op_shift 8 +#define SDMA_PKT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_HEADER_sub_op_mask) << SDMA_PKT_HEADER_sub_op_shift) + +/* +** Definitions for SDMA_PKT_COPY_LINEAR packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_LINEAR_HEADER_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_HEADER_op_shift 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift) + +/*define for encrypt field*/ +#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_offset 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_shift 16 +#define SDMA_PKT_COPY_LINEAR_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_encrypt_mask) << SDMA_PKT_COPY_LINEAR_HEADER_encrypt_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_HEADER_tmz_shift) + +/*define for backwards field*/ +#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_offset 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_shift 25 +#define SDMA_PKT_COPY_LINEAR_HEADER_BACKWARDS(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_backwards_mask) << SDMA_PKT_COPY_LINEAR_HEADER_backwards_shift) + +/*define for broadcast field*/ +#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_offset 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift 27 +#define SDMA_PKT_COPY_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_LINEAR_COUNT_count_offset 1 +#define SDMA_PKT_COPY_LINEAR_COUNT_count_mask 0x003FFFFF +#define SDMA_PKT_COPY_LINEAR_COUNT_count_shift 0 +#define SDMA_PKT_COPY_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_LINEAR_COUNT_count_shift) + +/*define for PARAMETER word*/ +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_offset 2 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift 16 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_offset 2 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift 24 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3 +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4 +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 5 +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 6 +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_COPY_LINEAR_BC packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_shift 0 +#define SDMA_PKT_COPY_LINEAR_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_BC_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_LINEAR_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_offset 1 +#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_mask 0x003FFFFF +#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_shift 0 +#define SDMA_PKT_COPY_LINEAR_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_COUNT_count_mask) << SDMA_PKT_COPY_LINEAR_BC_COUNT_count_shift) + +/*define for PARAMETER word*/ +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_offset 2 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_shift 16 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_shift) + +/*define for dst_ha field*/ +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_offset 2 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_shift 22 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_DST_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_offset 2 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_shift 24 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_shift) + +/*define for src_ha field*/ +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_offset 2 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_shift 30 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_SRC_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_offset 3 +#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_offset 4 +#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_offset 5 +#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_offset 6 +#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_COPY_DIRTY_PAGE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_offset 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_shift 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_OP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_shift) + +/*define for all field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_offset 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_shift 31 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_ALL(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_offset 1 +#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_mask 0x003FFFFF +#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_shift 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_mask) << SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_shift) + +/*define for PARAMETER word*/ +/*define for dst_mtype field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_mask 0x00000007 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_shift 3 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_MTYPE(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_shift) + +/*define for dst_l2_policy field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_mask 0x00000003 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_shift 6 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_L2_POLICY(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_shift) + +/*define for src_mtype field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_mask 0x00000007 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_shift 11 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_MTYPE(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_shift) + +/*define for src_l2_policy field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_mask 0x00000003 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_shift 14 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_L2_POLICY(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_shift) + +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_shift 16 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_shift) + +/*define for dst_gcc field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_shift 19 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_GCC(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_shift) + +/*define for dst_sys field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_shift 20 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SYS(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_shift) + +/*define for dst_snoop field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_shift 22 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SNOOP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_shift) + +/*define for dst_gpa field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_shift 23 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_GPA(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_shift 24 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_shift) + +/*define for src_sys field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_shift 28 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SYS(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_shift) + +/*define for src_snoop field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_shift 30 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SNOOP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_shift) + +/*define for src_gpa field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_shift 31 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_GPA(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_offset 3 +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_offset 4 +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_offset 5 +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_offset 6 +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_COPY_PHYSICAL_LINEAR packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_offset 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_shift 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_offset 1 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_mask 0x003FFFFF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_shift 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_shift) + +/*define for PARAMETER word*/ +/*define for dst_mtype field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_mask 0x00000007 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_shift 3 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_MTYPE(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_shift) + +/*define for dst_l2_policy field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_mask 0x00000003 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_shift 6 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_L2_POLICY(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_shift) + +/*define for src_mtype field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_mask 0x00000007 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_shift 11 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_MTYPE(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_shift) + +/*define for src_l2_policy field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_mask 0x00000003 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_shift 14 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_L2_POLICY(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_shift) + +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_shift 16 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_shift) + +/*define for dst_gcc field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_shift 19 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_GCC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_shift) + +/*define for dst_sys field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_shift 20 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SYS(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_shift) + +/*define for dst_log field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_shift 21 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_LOG(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_shift) + +/*define for dst_snoop field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_shift 22 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SNOOP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_shift) + +/*define for dst_gpa field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_shift 23 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_GPA(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_shift 24 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_shift) + +/*define for src_gcc field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_shift 27 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_GCC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_shift) + +/*define for src_sys field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_shift 28 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SYS(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_shift) + +/*define for src_snoop field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_shift 30 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SNOOP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_shift) + +/*define for src_gpa field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_shift 31 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_GPA(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 5 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 6 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_COPY_BROADCAST_LINEAR packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_offset 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift) + +/*define for encrypt field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_offset 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_mask 0x00000001 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_shift 16 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_shift) + +/*define for broadcast field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_offset 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask 0x00000001 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift 27 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_offset 1 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask 0x003FFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift) + +/*define for PARAMETER word*/ +/*define for dst2_sw field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_offset 2 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask 0x00000003 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift 8 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST2_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift) + +/*define for dst1_sw field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_offset 2 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask 0x00000003 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift 16 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST1_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_offset 2 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift 24 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST1_ADDR_LO word*/ +/*define for dst1_addr_31_0 field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_offset 5 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_DST1_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift) + +/*define for DST1_ADDR_HI word*/ +/*define for dst1_addr_63_32 field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_offset 6 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_DST1_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift) + +/*define for DST2_ADDR_LO word*/ +/*define for dst2_addr_31_0 field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_offset 7 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_DST2_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift) + +/*define for DST2_ADDR_HI word*/ +/*define for dst2_addr_63_32 field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_offset 8 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_DST2_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_shift) + +/*define for elementsize field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask 0x00000007 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift 29 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_ELEMENTSIZE(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_offset 1 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_offset 2 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for src_x field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_offset 3 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift) + +/*define for src_y field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_offset 3 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift) + +/*define for DW_4 word*/ +/*define for src_z field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_offset 4 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask 0x00001FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift) + +/*define for src_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_offset 4 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask 0x0007FFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift 13 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift) + +/*define for DW_5 word*/ +/*define for src_slice_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_offset 5 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask 0x0FFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_SRC_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_offset 6 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_offset 7 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_8 word*/ +/*define for dst_x field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_offset 8 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift) + +/*define for dst_y field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_offset 8 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift) + +/*define for DW_9 word*/ +/*define for dst_z field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_offset 9 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask 0x00001FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift) + +/*define for dst_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_offset 9 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask 0x0007FFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift 13 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift) + +/*define for DW_10 word*/ +/*define for dst_slice_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_offset 10 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask 0x0FFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_DST_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift) + +/*define for DW_11 word*/ +/*define for rect_x field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_offset 11 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift) + +/*define for rect_y field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_offset 11 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift) + +/*define for DW_12 word*/ +/*define for rect_z field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask 0x00001FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift) + +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift 24 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift) + + +/* +** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN_BC packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_shift) + +/*define for elementsize field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_mask 0x00000007 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_shift 29 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_ELEMENTSIZE(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_offset 1 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_offset 2 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for src_x field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_offset 3 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_shift) + +/*define for src_y field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_offset 3 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_shift) + +/*define for DW_4 word*/ +/*define for src_z field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_offset 4 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_mask 0x000007FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_shift) + +/*define for src_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_offset 4 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_shift 13 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_shift) + +/*define for DW_5 word*/ +/*define for src_slice_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_offset 5 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_mask 0x0FFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_SRC_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_offset 6 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_offset 7 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_8 word*/ +/*define for dst_x field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_offset 8 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_shift) + +/*define for dst_y field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_offset 8 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_shift) + +/*define for DW_9 word*/ +/*define for dst_z field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_offset 9 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_mask 0x000007FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_shift) + +/*define for dst_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_offset 9 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_shift 13 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_shift) + +/*define for DW_10 word*/ +/*define for dst_slice_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_offset 10 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_mask 0x0FFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_DST_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_shift) + +/*define for DW_11 word*/ +/*define for rect_x field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_offset 11 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_shift) + +/*define for rect_y field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_offset 11 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_shift) + +/*define for DW_12 word*/ +/*define for rect_z field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_mask 0x000007FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_shift) + +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_shift) + +/*define for dst_ha field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_shift 22 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_DST_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_shift 24 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_shift) + +/*define for src_ha field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_shift 30 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_SRC_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_shift) + + +/* +** Definitions for SDMA_PKT_COPY_TILED packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_TILED_HEADER_op_offset 0 +#define SDMA_PKT_COPY_TILED_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_HEADER_op_shift 0 +#define SDMA_PKT_COPY_TILED_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_op_mask) << SDMA_PKT_COPY_TILED_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_TILED_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_TILED_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_HEADER_sub_op_shift) + +/*define for encrypt field*/ +#define SDMA_PKT_COPY_TILED_HEADER_encrypt_offset 0 +#define SDMA_PKT_COPY_TILED_HEADER_encrypt_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_HEADER_encrypt_shift 16 +#define SDMA_PKT_COPY_TILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_encrypt_mask) << SDMA_PKT_COPY_TILED_HEADER_encrypt_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_TILED_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_TILED_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_TILED_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_tmz_mask) << SDMA_PKT_COPY_TILED_HEADER_tmz_shift) + +/*define for detile field*/ +#define SDMA_PKT_COPY_TILED_HEADER_detile_offset 0 +#define SDMA_PKT_COPY_TILED_HEADER_detile_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_HEADER_detile_shift 31 +#define SDMA_PKT_COPY_TILED_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_HEADER_detile_shift) + +/*define for TILED_ADDR_LO word*/ +/*define for tiled_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_offset 1 +#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift) + +/*define for TILED_ADDR_HI word*/ +/*define for tiled_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_offset 2 +#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for width field*/ +#define SDMA_PKT_COPY_TILED_DW_3_width_offset 3 +#define SDMA_PKT_COPY_TILED_DW_3_width_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_DW_3_width_shift 0 +#define SDMA_PKT_COPY_TILED_DW_3_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_DW_3_width_mask) << SDMA_PKT_COPY_TILED_DW_3_width_shift) + +/*define for DW_4 word*/ +/*define for height field*/ +#define SDMA_PKT_COPY_TILED_DW_4_height_offset 4 +#define SDMA_PKT_COPY_TILED_DW_4_height_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_DW_4_height_shift 0 +#define SDMA_PKT_COPY_TILED_DW_4_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_DW_4_height_mask) << SDMA_PKT_COPY_TILED_DW_4_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_COPY_TILED_DW_4_depth_offset 4 +#define SDMA_PKT_COPY_TILED_DW_4_depth_mask 0x00001FFF +#define SDMA_PKT_COPY_TILED_DW_4_depth_shift 16 +#define SDMA_PKT_COPY_TILED_DW_4_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_DW_4_depth_mask) << SDMA_PKT_COPY_TILED_DW_4_depth_shift) + +/*define for DW_5 word*/ +/*define for element_size field*/ +#define SDMA_PKT_COPY_TILED_DW_5_element_size_offset 5 +#define SDMA_PKT_COPY_TILED_DW_5_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_DW_5_element_size_shift 0 +#define SDMA_PKT_COPY_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_element_size_mask) << SDMA_PKT_COPY_TILED_DW_5_element_size_shift) + +/*define for swizzle_mode field*/ +#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_offset 5 +#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_mask 0x0000001F +#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_shift 3 +#define SDMA_PKT_COPY_TILED_DW_5_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_mask) << SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_shift) + +/*define for dimension field*/ +#define SDMA_PKT_COPY_TILED_DW_5_dimension_offset 5 +#define SDMA_PKT_COPY_TILED_DW_5_dimension_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_DW_5_dimension_shift 9 +#define SDMA_PKT_COPY_TILED_DW_5_DIMENSION(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_dimension_mask) << SDMA_PKT_COPY_TILED_DW_5_dimension_shift) + +/*define for mip_max field*/ +#define SDMA_PKT_COPY_TILED_DW_5_mip_max_offset 5 +#define SDMA_PKT_COPY_TILED_DW_5_mip_max_mask 0x0000000F +#define SDMA_PKT_COPY_TILED_DW_5_mip_max_shift 16 +#define SDMA_PKT_COPY_TILED_DW_5_MIP_MAX(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_mip_max_mask) << SDMA_PKT_COPY_TILED_DW_5_mip_max_shift) + +/*define for DW_6 word*/ +/*define for x field*/ +#define SDMA_PKT_COPY_TILED_DW_6_x_offset 6 +#define SDMA_PKT_COPY_TILED_DW_6_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_DW_6_x_shift 0 +#define SDMA_PKT_COPY_TILED_DW_6_X(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_x_mask) << SDMA_PKT_COPY_TILED_DW_6_x_shift) + +/*define for y field*/ +#define SDMA_PKT_COPY_TILED_DW_6_y_offset 6 +#define SDMA_PKT_COPY_TILED_DW_6_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_DW_6_y_shift 16 +#define SDMA_PKT_COPY_TILED_DW_6_Y(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_y_mask) << SDMA_PKT_COPY_TILED_DW_6_y_shift) + +/*define for DW_7 word*/ +/*define for z field*/ +#define SDMA_PKT_COPY_TILED_DW_7_z_offset 7 +#define SDMA_PKT_COPY_TILED_DW_7_z_mask 0x00001FFF +#define SDMA_PKT_COPY_TILED_DW_7_z_shift 0 +#define SDMA_PKT_COPY_TILED_DW_7_Z(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_z_mask) << SDMA_PKT_COPY_TILED_DW_7_z_shift) + +/*define for linear_sw field*/ +#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_offset 7 +#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift 16 +#define SDMA_PKT_COPY_TILED_DW_7_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift) + +/*define for linear_cc field*/ +#define SDMA_PKT_COPY_TILED_DW_7_linear_cc_offset 7 +#define SDMA_PKT_COPY_TILED_DW_7_linear_cc_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_DW_7_linear_cc_shift 20 +#define SDMA_PKT_COPY_TILED_DW_7_LINEAR_CC(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_linear_cc_mask) << SDMA_PKT_COPY_TILED_DW_7_linear_cc_shift) + +/*define for tile_sw field*/ +#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_offset 7 +#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift 24 +#define SDMA_PKT_COPY_TILED_DW_7_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift) + +/*define for LINEAR_ADDR_LO word*/ +/*define for linear_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_offset 8 +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift) + +/*define for LINEAR_ADDR_HI word*/ +/*define for linear_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_offset 9 +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift) + +/*define for LINEAR_PITCH word*/ +/*define for linear_pitch field*/ +#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_offset 10 +#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF +#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift 0 +#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift) + +/*define for LINEAR_SLICE_PITCH word*/ +/*define for linear_slice_pitch field*/ +#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 11 +#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0 +#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_TILED_COUNT_count_offset 12 +#define SDMA_PKT_COPY_TILED_COUNT_count_mask 0x003FFFFF +#define SDMA_PKT_COPY_TILED_COUNT_count_shift 0 +#define SDMA_PKT_COPY_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_TILED_COUNT_count_mask) << SDMA_PKT_COPY_TILED_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_COPY_TILED_BC packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_TILED_BC_HEADER_op_offset 0 +#define SDMA_PKT_COPY_TILED_BC_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_BC_HEADER_op_shift 0 +#define SDMA_PKT_COPY_TILED_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_op_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_TILED_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_shift) + +/*define for detile field*/ +#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_offset 0 +#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_shift 31 +#define SDMA_PKT_COPY_TILED_BC_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_detile_shift) + +/*define for TILED_ADDR_LO word*/ +/*define for tiled_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_offset 1 +#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_shift) + +/*define for TILED_ADDR_HI word*/ +/*define for tiled_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_offset 2 +#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for width field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_3_width_offset 3 +#define SDMA_PKT_COPY_TILED_BC_DW_3_width_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_BC_DW_3_width_shift 0 +#define SDMA_PKT_COPY_TILED_BC_DW_3_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_3_width_mask) << SDMA_PKT_COPY_TILED_BC_DW_3_width_shift) + +/*define for DW_4 word*/ +/*define for height field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_4_height_offset 4 +#define SDMA_PKT_COPY_TILED_BC_DW_4_height_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_BC_DW_4_height_shift 0 +#define SDMA_PKT_COPY_TILED_BC_DW_4_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_4_height_mask) << SDMA_PKT_COPY_TILED_BC_DW_4_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_offset 4 +#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_mask 0x000007FF +#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_shift 16 +#define SDMA_PKT_COPY_TILED_BC_DW_4_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_4_depth_mask) << SDMA_PKT_COPY_TILED_BC_DW_4_depth_shift) + +/*define for DW_5 word*/ +/*define for element_size field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_shift 0 +#define SDMA_PKT_COPY_TILED_BC_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_element_size_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_element_size_shift) + +/*define for array_mode field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_mask 0x0000000F +#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_shift 3 +#define SDMA_PKT_COPY_TILED_BC_DW_5_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_shift) + +/*define for mit_mode field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_shift 8 +#define SDMA_PKT_COPY_TILED_BC_DW_5_MIT_MODE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_shift) + +/*define for tilesplit_size field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_shift 11 +#define SDMA_PKT_COPY_TILED_BC_DW_5_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_shift) + +/*define for bank_w field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_shift 15 +#define SDMA_PKT_COPY_TILED_BC_DW_5_BANK_W(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_shift) + +/*define for bank_h field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_shift 18 +#define SDMA_PKT_COPY_TILED_BC_DW_5_BANK_H(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_shift) + +/*define for num_bank field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_shift 21 +#define SDMA_PKT_COPY_TILED_BC_DW_5_NUM_BANK(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_shift) + +/*define for mat_aspt field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_shift 24 +#define SDMA_PKT_COPY_TILED_BC_DW_5_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_shift) + +/*define for pipe_config field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_mask 0x0000001F +#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_shift 26 +#define SDMA_PKT_COPY_TILED_BC_DW_5_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_shift) + +/*define for DW_6 word*/ +/*define for x field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_6_x_offset 6 +#define SDMA_PKT_COPY_TILED_BC_DW_6_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_BC_DW_6_x_shift 0 +#define SDMA_PKT_COPY_TILED_BC_DW_6_X(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_6_x_mask) << SDMA_PKT_COPY_TILED_BC_DW_6_x_shift) + +/*define for y field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_6_y_offset 6 +#define SDMA_PKT_COPY_TILED_BC_DW_6_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_BC_DW_6_y_shift 16 +#define SDMA_PKT_COPY_TILED_BC_DW_6_Y(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_6_y_mask) << SDMA_PKT_COPY_TILED_BC_DW_6_y_shift) + +/*define for DW_7 word*/ +/*define for z field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_7_z_offset 7 +#define SDMA_PKT_COPY_TILED_BC_DW_7_z_mask 0x000007FF +#define SDMA_PKT_COPY_TILED_BC_DW_7_z_shift 0 +#define SDMA_PKT_COPY_TILED_BC_DW_7_Z(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_z_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_z_shift) + +/*define for linear_sw field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_offset 7 +#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_shift 16 +#define SDMA_PKT_COPY_TILED_BC_DW_7_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_shift) + +/*define for tile_sw field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_offset 7 +#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_shift 24 +#define SDMA_PKT_COPY_TILED_BC_DW_7_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_shift) + +/*define for LINEAR_ADDR_LO word*/ +/*define for linear_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_offset 8 +#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift) + +/*define for LINEAR_ADDR_HI word*/ +/*define for linear_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_offset 9 +#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift) + +/*define for LINEAR_PITCH word*/ +/*define for linear_pitch field*/ +#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_offset 10 +#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF +#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_shift 0 +#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_TILED_BC_COUNT_count_offset 11 +#define SDMA_PKT_COPY_TILED_BC_COUNT_count_mask 0x000FFFFF +#define SDMA_PKT_COPY_TILED_BC_COUNT_count_shift 2 +#define SDMA_PKT_COPY_TILED_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_TILED_BC_COUNT_count_mask) << SDMA_PKT_COPY_TILED_BC_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_COPY_L2T_BROADCAST packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift) + +/*define for encrypt field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_mask 0x00000001 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_shift 16 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_shift) + +/*define for videocopy field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask 0x00000001 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift 26 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_VIDEOCOPY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift) + +/*define for broadcast field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask 0x00000001 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift 27 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift) + +/*define for TILED_ADDR_LO_0 word*/ +/*define for tiled_addr0_31_0 field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_offset 1 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_TILED_ADDR0_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift) + +/*define for TILED_ADDR_HI_0 word*/ +/*define for tiled_addr0_63_32 field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_offset 2 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_TILED_ADDR0_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift) + +/*define for TILED_ADDR_LO_1 word*/ +/*define for tiled_addr1_31_0 field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_offset 3 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_TILED_ADDR1_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift) + +/*define for TILED_ADDR_HI_1 word*/ +/*define for tiled_addr1_63_32 field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_offset 4 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_TILED_ADDR1_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift) + +/*define for DW_5 word*/ +/*define for width field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_offset 5 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_mask 0x00003FFF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_WIDTH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_shift) + +/*define for DW_6 word*/ +/*define for height field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_offset 6 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_mask 0x00003FFF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_HEIGHT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_offset 6 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_mask 0x00001FFF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_shift 16 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_DEPTH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_shift) + +/*define for DW_7 word*/ +/*define for element_size field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_offset 7 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift) + +/*define for swizzle_mode field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_offset 7 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_mask 0x0000001F +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_shift 3 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_shift) + +/*define for dimension field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_offset 7 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_mask 0x00000003 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_shift 9 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_DIMENSION(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_shift) + +/*define for mip_max field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_offset 7 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_mask 0x0000000F +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_shift 16 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_MIP_MAX(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_shift) + +/*define for DW_8 word*/ +/*define for x field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_offset 8 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask 0x00003FFF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_X(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift) + +/*define for y field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_offset 8 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask 0x00003FFF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift 16 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_Y(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift) + +/*define for DW_9 word*/ +/*define for z field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_offset 9 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask 0x00001FFF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_Z(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift) + +/*define for DW_10 word*/ +/*define for dst2_sw field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_offset 10 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask 0x00000003 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift 8 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_DST2_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift) + +/*define for linear_sw field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_offset 10 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask 0x00000003 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift 16 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift) + +/*define for tile_sw field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_offset 10 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask 0x00000003 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift 24 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_TILE_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift) + +/*define for LINEAR_ADDR_LO word*/ +/*define for linear_addr_31_0 field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_offset 11 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift) + +/*define for LINEAR_ADDR_HI word*/ +/*define for linear_addr_63_32 field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_offset 12 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift) + +/*define for LINEAR_PITCH word*/ +/*define for linear_pitch field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_offset 13 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift) + +/*define for LINEAR_SLICE_PITCH word*/ +/*define for linear_slice_pitch field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 14 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_offset 15 +#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask 0x003FFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask) << SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_COPY_T2T packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_T2T_HEADER_op_offset 0 +#define SDMA_PKT_COPY_T2T_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_T2T_HEADER_op_shift 0 +#define SDMA_PKT_COPY_T2T_HEADER_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_op_mask) << SDMA_PKT_COPY_T2T_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_T2T_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_T2T_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_T2T_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_T2T_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_sub_op_mask) << SDMA_PKT_COPY_T2T_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_T2T_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_T2T_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_T2T_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_tmz_mask) << SDMA_PKT_COPY_T2T_HEADER_tmz_shift) + +/*define for dcc field*/ +#define SDMA_PKT_COPY_T2T_HEADER_dcc_offset 0 +#define SDMA_PKT_COPY_T2T_HEADER_dcc_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_HEADER_dcc_shift 19 +#define SDMA_PKT_COPY_T2T_HEADER_DCC(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_dcc_mask) << SDMA_PKT_COPY_T2T_HEADER_dcc_shift) + +/*define for dcc_dir field*/ +#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_offset 0 +#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_shift 31 +#define SDMA_PKT_COPY_T2T_HEADER_DCC_DIR(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_dcc_dir_mask) << SDMA_PKT_COPY_T2T_HEADER_dcc_dir_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_offset 1 +#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_offset 2 +#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for src_x field*/ +#define SDMA_PKT_COPY_T2T_DW_3_src_x_offset 3 +#define SDMA_PKT_COPY_T2T_DW_3_src_x_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_3_src_x_shift 0 +#define SDMA_PKT_COPY_T2T_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_x_mask) << SDMA_PKT_COPY_T2T_DW_3_src_x_shift) + +/*define for src_y field*/ +#define SDMA_PKT_COPY_T2T_DW_3_src_y_offset 3 +#define SDMA_PKT_COPY_T2T_DW_3_src_y_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_3_src_y_shift 16 +#define SDMA_PKT_COPY_T2T_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_y_mask) << SDMA_PKT_COPY_T2T_DW_3_src_y_shift) + +/*define for DW_4 word*/ +/*define for src_z field*/ +#define SDMA_PKT_COPY_T2T_DW_4_src_z_offset 4 +#define SDMA_PKT_COPY_T2T_DW_4_src_z_mask 0x00001FFF +#define SDMA_PKT_COPY_T2T_DW_4_src_z_shift 0 +#define SDMA_PKT_COPY_T2T_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_z_mask) << SDMA_PKT_COPY_T2T_DW_4_src_z_shift) + +/*define for src_width field*/ +#define SDMA_PKT_COPY_T2T_DW_4_src_width_offset 4 +#define SDMA_PKT_COPY_T2T_DW_4_src_width_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_4_src_width_shift 16 +#define SDMA_PKT_COPY_T2T_DW_4_SRC_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_width_mask) << SDMA_PKT_COPY_T2T_DW_4_src_width_shift) + +/*define for DW_5 word*/ +/*define for src_height field*/ +#define SDMA_PKT_COPY_T2T_DW_5_src_height_offset 5 +#define SDMA_PKT_COPY_T2T_DW_5_src_height_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_5_src_height_shift 0 +#define SDMA_PKT_COPY_T2T_DW_5_SRC_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_DW_5_src_height_mask) << SDMA_PKT_COPY_T2T_DW_5_src_height_shift) + +/*define for src_depth field*/ +#define SDMA_PKT_COPY_T2T_DW_5_src_depth_offset 5 +#define SDMA_PKT_COPY_T2T_DW_5_src_depth_mask 0x00001FFF +#define SDMA_PKT_COPY_T2T_DW_5_src_depth_shift 16 +#define SDMA_PKT_COPY_T2T_DW_5_SRC_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_5_src_depth_mask) << SDMA_PKT_COPY_T2T_DW_5_src_depth_shift) + +/*define for DW_6 word*/ +/*define for src_element_size field*/ +#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_offset 6 +#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift 0 +#define SDMA_PKT_COPY_T2T_DW_6_SRC_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask) << SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift) + +/*define for src_swizzle_mode field*/ +#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_offset 6 +#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_mask 0x0000001F +#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_shift 3 +#define SDMA_PKT_COPY_T2T_DW_6_SRC_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_mask) << SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_shift) + +/*define for src_dimension field*/ +#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_offset 6 +#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_shift 9 +#define SDMA_PKT_COPY_T2T_DW_6_SRC_DIMENSION(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_dimension_mask) << SDMA_PKT_COPY_T2T_DW_6_src_dimension_shift) + +/*define for src_mip_max field*/ +#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_offset 6 +#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_mask 0x0000000F +#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_shift 16 +#define SDMA_PKT_COPY_T2T_DW_6_SRC_MIP_MAX(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_mip_max_mask) << SDMA_PKT_COPY_T2T_DW_6_src_mip_max_shift) + +/*define for src_mip_id field*/ +#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_offset 6 +#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_mask 0x0000000F +#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_shift 20 +#define SDMA_PKT_COPY_T2T_DW_6_SRC_MIP_ID(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_mip_id_mask) << SDMA_PKT_COPY_T2T_DW_6_src_mip_id_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_offset 7 +#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_offset 8 +#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_9 word*/ +/*define for dst_x field*/ +#define SDMA_PKT_COPY_T2T_DW_9_dst_x_offset 9 +#define SDMA_PKT_COPY_T2T_DW_9_dst_x_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_9_dst_x_shift 0 +#define SDMA_PKT_COPY_T2T_DW_9_DST_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_x_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_x_shift) + +/*define for dst_y field*/ +#define SDMA_PKT_COPY_T2T_DW_9_dst_y_offset 9 +#define SDMA_PKT_COPY_T2T_DW_9_dst_y_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_9_dst_y_shift 16 +#define SDMA_PKT_COPY_T2T_DW_9_DST_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_y_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_y_shift) + +/*define for DW_10 word*/ +/*define for dst_z field*/ +#define SDMA_PKT_COPY_T2T_DW_10_dst_z_offset 10 +#define SDMA_PKT_COPY_T2T_DW_10_dst_z_mask 0x00001FFF +#define SDMA_PKT_COPY_T2T_DW_10_dst_z_shift 0 +#define SDMA_PKT_COPY_T2T_DW_10_DST_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_z_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_z_shift) + +/*define for dst_width field*/ +#define SDMA_PKT_COPY_T2T_DW_10_dst_width_offset 10 +#define SDMA_PKT_COPY_T2T_DW_10_dst_width_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_10_dst_width_shift 16 +#define SDMA_PKT_COPY_T2T_DW_10_DST_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_width_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_width_shift) + +/*define for DW_11 word*/ +/*define for dst_height field*/ +#define SDMA_PKT_COPY_T2T_DW_11_dst_height_offset 11 +#define SDMA_PKT_COPY_T2T_DW_11_dst_height_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_11_dst_height_shift 0 +#define SDMA_PKT_COPY_T2T_DW_11_DST_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_DW_11_dst_height_mask) << SDMA_PKT_COPY_T2T_DW_11_dst_height_shift) + +/*define for dst_depth field*/ +#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_offset 11 +#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_mask 0x00001FFF +#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_shift 16 +#define SDMA_PKT_COPY_T2T_DW_11_DST_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_11_dst_depth_mask) << SDMA_PKT_COPY_T2T_DW_11_dst_depth_shift) + +/*define for DW_12 word*/ +/*define for dst_element_size field*/ +#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_offset 12 +#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_shift 0 +#define SDMA_PKT_COPY_T2T_DW_12_DST_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_element_size_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_element_size_shift) + +/*define for dst_swizzle_mode field*/ +#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_offset 12 +#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_mask 0x0000001F +#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_shift 3 +#define SDMA_PKT_COPY_T2T_DW_12_DST_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_shift) + +/*define for dst_dimension field*/ +#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_offset 12 +#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_shift 9 +#define SDMA_PKT_COPY_T2T_DW_12_DST_DIMENSION(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_dimension_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_dimension_shift) + +/*define for dst_mip_max field*/ +#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_offset 12 +#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_mask 0x0000000F +#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_shift 16 +#define SDMA_PKT_COPY_T2T_DW_12_DST_MIP_MAX(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_shift) + +/*define for dst_mip_id field*/ +#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_offset 12 +#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_mask 0x0000000F +#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_shift 20 +#define SDMA_PKT_COPY_T2T_DW_12_DST_MIP_ID(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_shift) + +/*define for DW_13 word*/ +/*define for rect_x field*/ +#define SDMA_PKT_COPY_T2T_DW_13_rect_x_offset 13 +#define SDMA_PKT_COPY_T2T_DW_13_rect_x_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_13_rect_x_shift 0 +#define SDMA_PKT_COPY_T2T_DW_13_RECT_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_x_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_x_shift) + +/*define for rect_y field*/ +#define SDMA_PKT_COPY_T2T_DW_13_rect_y_offset 13 +#define SDMA_PKT_COPY_T2T_DW_13_rect_y_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_13_rect_y_shift 16 +#define SDMA_PKT_COPY_T2T_DW_13_RECT_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_y_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_y_shift) + +/*define for DW_14 word*/ +/*define for rect_z field*/ +#define SDMA_PKT_COPY_T2T_DW_14_rect_z_offset 14 +#define SDMA_PKT_COPY_T2T_DW_14_rect_z_mask 0x00001FFF +#define SDMA_PKT_COPY_T2T_DW_14_rect_z_shift 0 +#define SDMA_PKT_COPY_T2T_DW_14_RECT_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_rect_z_mask) << SDMA_PKT_COPY_T2T_DW_14_rect_z_shift) + +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_offset 14 +#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift 16 +#define SDMA_PKT_COPY_T2T_DW_14_DST_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_T2T_DW_14_src_sw_offset 14 +#define SDMA_PKT_COPY_T2T_DW_14_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_DW_14_src_sw_shift 24 +#define SDMA_PKT_COPY_T2T_DW_14_SRC_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_src_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_src_sw_shift) + +/*define for META_ADDR_LO word*/ +/*define for meta_addr_31_0 field*/ +#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_offset 15 +#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_shift 0 +#define SDMA_PKT_COPY_T2T_META_ADDR_LO_META_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_mask) << SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_shift) + +/*define for META_ADDR_HI word*/ +/*define for meta_addr_63_32 field*/ +#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_offset 16 +#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_shift 0 +#define SDMA_PKT_COPY_T2T_META_ADDR_HI_META_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_mask) << SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_shift) + +/*define for META_CONFIG word*/ +/*define for data_format field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_mask 0x0000007F +#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_shift 0 +#define SDMA_PKT_COPY_T2T_META_CONFIG_DATA_FORMAT(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_data_format_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_data_format_shift) + +/*define for color_transform_disable field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_shift 7 +#define SDMA_PKT_COPY_T2T_META_CONFIG_COLOR_TRANSFORM_DISABLE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_shift) + +/*define for alpha_is_on_msb field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_shift 8 +#define SDMA_PKT_COPY_T2T_META_CONFIG_ALPHA_IS_ON_MSB(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_shift) + +/*define for number_type field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_shift 9 +#define SDMA_PKT_COPY_T2T_META_CONFIG_NUMBER_TYPE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_number_type_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_number_type_shift) + +/*define for surface_type field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_shift 12 +#define SDMA_PKT_COPY_T2T_META_CONFIG_SURFACE_TYPE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_shift) + +/*define for max_comp_block_size field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_shift 24 +#define SDMA_PKT_COPY_T2T_META_CONFIG_MAX_COMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_shift) + +/*define for max_uncomp_block_size field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_shift 26 +#define SDMA_PKT_COPY_T2T_META_CONFIG_MAX_UNCOMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_shift) + +/*define for write_compress_enable field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_shift 28 +#define SDMA_PKT_COPY_T2T_META_CONFIG_WRITE_COMPRESS_ENABLE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_shift) + +/*define for meta_tmz field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_shift 29 +#define SDMA_PKT_COPY_T2T_META_CONFIG_META_TMZ(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_shift) + + +/* +** Definitions for SDMA_PKT_COPY_T2T_BC packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_T2T_BC_HEADER_op_offset 0 +#define SDMA_PKT_COPY_T2T_BC_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_T2T_BC_HEADER_op_shift 0 +#define SDMA_PKT_COPY_T2T_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_T2T_BC_HEADER_op_mask) << SDMA_PKT_COPY_T2T_BC_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_T2T_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_offset 1 +#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_offset 2 +#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for src_x field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_offset 3 +#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_3_src_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_3_src_x_shift) + +/*define for src_y field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_offset 3 +#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_shift 16 +#define SDMA_PKT_COPY_T2T_BC_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_3_src_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_3_src_y_shift) + +/*define for DW_4 word*/ +/*define for src_z field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_offset 4 +#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_mask 0x000007FF +#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_4_src_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_4_src_z_shift) + +/*define for src_width field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_offset 4 +#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_shift 16 +#define SDMA_PKT_COPY_T2T_BC_DW_4_SRC_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_4_src_width_mask) << SDMA_PKT_COPY_T2T_BC_DW_4_src_width_shift) + +/*define for DW_5 word*/ +/*define for src_height field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_offset 5 +#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_5_SRC_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_5_src_height_mask) << SDMA_PKT_COPY_T2T_BC_DW_5_src_height_shift) + +/*define for src_depth field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_offset 5 +#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_mask 0x000007FF +#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_shift 16 +#define SDMA_PKT_COPY_T2T_BC_DW_5_SRC_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_mask) << SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_shift) + +/*define for DW_6 word*/ +/*define for src_element_size field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_shift) + +/*define for src_array_mode field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_mask 0x0000000F +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_shift 3 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_shift) + +/*define for src_mit_mode field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_shift 8 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_MIT_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_shift) + +/*define for src_tilesplit_size field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_shift 11 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_shift) + +/*define for src_bank_w field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_shift 15 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_BANK_W(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_shift) + +/*define for src_bank_h field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_shift 18 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_BANK_H(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_shift) + +/*define for src_num_bank field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_shift 21 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_NUM_BANK(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_shift) + +/*define for src_mat_aspt field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_shift 24 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_shift) + +/*define for src_pipe_config field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_mask 0x0000001F +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_shift 26 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_offset 7 +#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_offset 8 +#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_9 word*/ +/*define for dst_x field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_offset 9 +#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_9_DST_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_shift) + +/*define for dst_y field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_offset 9 +#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_shift 16 +#define SDMA_PKT_COPY_T2T_BC_DW_9_DST_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_shift) + +/*define for DW_10 word*/ +/*define for dst_z field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_offset 10 +#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_mask 0x000007FF +#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_10_DST_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_shift) + +/*define for dst_width field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_offset 10 +#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_shift 16 +#define SDMA_PKT_COPY_T2T_BC_DW_10_DST_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_mask) << SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_shift) + +/*define for DW_11 word*/ +/*define for dst_height field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_offset 11 +#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_11_DST_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_mask) << SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_shift) + +/*define for dst_depth field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_offset 11 +#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_mask 0x00000FFF +#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_shift 16 +#define SDMA_PKT_COPY_T2T_BC_DW_11_DST_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_mask) << SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_shift) + +/*define for DW_12 word*/ +/*define for dst_element_size field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_shift) + +/*define for dst_array_mode field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_mask 0x0000000F +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_shift 3 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_shift) + +/*define for dst_mit_mode field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_shift 8 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_MIT_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_shift) + +/*define for dst_tilesplit_size field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_shift 11 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_shift) + +/*define for dst_bank_w field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_shift 15 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_BANK_W(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_shift) + +/*define for dst_bank_h field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_shift 18 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_BANK_H(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_shift) + +/*define for dst_num_bank field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_shift 21 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_NUM_BANK(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_shift) + +/*define for dst_mat_aspt field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_shift 24 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_shift) + +/*define for dst_pipe_config field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_mask 0x0000001F +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_shift 26 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_shift) + +/*define for DW_13 word*/ +/*define for rect_x field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_offset 13 +#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_13_RECT_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_shift) + +/*define for rect_y field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_offset 13 +#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_shift 16 +#define SDMA_PKT_COPY_T2T_BC_DW_13_RECT_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_shift) + +/*define for DW_14 word*/ +/*define for rect_z field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_offset 14 +#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_mask 0x000007FF +#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_14_RECT_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_shift) + +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_offset 14 +#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_shift 16 +#define SDMA_PKT_COPY_T2T_BC_DW_14_DST_SW(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_offset 14 +#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_shift 24 +#define SDMA_PKT_COPY_T2T_BC_DW_14_SRC_SW(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_shift) + + +/* +** Definitions for SDMA_PKT_COPY_TILED_SUBWIN packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_shift) + +/*define for dcc field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_shift 19 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_DCC(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_shift) + +/*define for detile field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift 31 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift) + +/*define for TILED_ADDR_LO word*/ +/*define for tiled_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_offset 1 +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift) + +/*define for TILED_ADDR_HI word*/ +/*define for tiled_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_offset 2 +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for tiled_x field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_offset 3 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift) + +/*define for tiled_y field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_offset 3 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift) + +/*define for DW_4 word*/ +/*define for tiled_z field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_offset 4 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask 0x00001FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_TILED_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift) + +/*define for width field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_offset 4 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_shift) + +/*define for DW_5 word*/ +/*define for height field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_offset 5 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_offset 5 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_mask 0x00001FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_shift) + +/*define for DW_6 word*/ +/*define for element_size field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift) + +/*define for swizzle_mode field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_mask 0x0000001F +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_shift 3 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_shift) + +/*define for dimension field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_shift 9 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_DIMENSION(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_shift) + +/*define for mip_max field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_mask 0x0000000F +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_MIP_MAX(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_shift) + +/*define for mip_id field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_mask 0x0000000F +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_shift 20 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_MIP_ID(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_shift) + +/*define for LINEAR_ADDR_LO word*/ +/*define for linear_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_offset 7 +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift) + +/*define for LINEAR_ADDR_HI word*/ +/*define for linear_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_offset 8 +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift) + +/*define for DW_9 word*/ +/*define for linear_x field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_offset 9 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift) + +/*define for linear_y field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_offset 9 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift) + +/*define for DW_10 word*/ +/*define for linear_z field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_offset 10 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask 0x00001FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift) + +/*define for linear_pitch field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_offset 10 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift) + +/*define for DW_11 word*/ +/*define for linear_slice_pitch field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_offset 11 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask 0x0FFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift) + +/*define for DW_12 word*/ +/*define for rect_x field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_offset 12 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift) + +/*define for rect_y field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_offset 12 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift) + +/*define for DW_13 word*/ +/*define for rect_z field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_offset 13 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask 0x00001FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_RECT_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift) + +/*define for linear_sw field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_offset 13 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift) + +/*define for tile_sw field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_offset 13 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift 24 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift) + +/*define for META_ADDR_LO word*/ +/*define for meta_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_offset 14 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_META_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_shift) + +/*define for META_ADDR_HI word*/ +/*define for meta_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_offset 15 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_META_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_shift) + +/*define for META_CONFIG word*/ +/*define for data_format field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_mask 0x0000007F +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_DATA_FORMAT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_shift) + +/*define for color_transform_disable field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_shift 7 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_COLOR_TRANSFORM_DISABLE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_shift) + +/*define for alpha_is_on_msb field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_shift 8 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_ALPHA_IS_ON_MSB(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_shift) + +/*define for number_type field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_shift 9 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_NUMBER_TYPE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_shift) + +/*define for surface_type field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_shift 12 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_SURFACE_TYPE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_shift) + +/*define for max_comp_block_size field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_shift 24 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_MAX_COMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_shift) + +/*define for max_uncomp_block_size field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_shift 26 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_MAX_UNCOMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_shift) + +/*define for write_compress_enable field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_shift 28 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_WRITE_COMPRESS_ENABLE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_shift) + +/*define for meta_tmz field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_shift 29 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_META_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_shift) + + +/* +** Definitions for SDMA_PKT_COPY_TILED_SUBWIN_BC packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_shift) + +/*define for detile field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_shift 31 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_shift) + +/*define for TILED_ADDR_LO word*/ +/*define for tiled_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_offset 1 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_shift) + +/*define for TILED_ADDR_HI word*/ +/*define for tiled_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_offset 2 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for tiled_x field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_offset 3 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_TILED_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_shift) + +/*define for tiled_y field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_offset 3 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_TILED_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_shift) + +/*define for DW_4 word*/ +/*define for tiled_z field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_offset 4 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_mask 0x000007FF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_TILED_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_shift) + +/*define for width field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_offset 4 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_shift) + +/*define for DW_5 word*/ +/*define for height field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_offset 5 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_offset 5 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_mask 0x000007FF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_shift) + +/*define for DW_6 word*/ +/*define for element_size field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_shift) + +/*define for array_mode field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_mask 0x0000000F +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_shift 3 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_shift) + +/*define for mit_mode field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_shift 8 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_MIT_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_shift) + +/*define for tilesplit_size field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_shift 11 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_shift) + +/*define for bank_w field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_shift 15 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_BANK_W(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_shift) + +/*define for bank_h field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_shift 18 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_BANK_H(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_shift) + +/*define for num_bank field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_shift 21 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_NUM_BANK(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_shift) + +/*define for mat_aspt field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_shift 24 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_MAT_ASPT(x) ((x & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_shift) + +/*define for pipe_config field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_mask 0x0000001F +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_shift 26 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_shift) + +/*define for LINEAR_ADDR_LO word*/ +/*define for linear_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_offset 7 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift) + +/*define for LINEAR_ADDR_HI word*/ +/*define for linear_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_offset 8 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift) + +/*define for DW_9 word*/ +/*define for linear_x field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_offset 9 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_LINEAR_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_shift) + +/*define for linear_y field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_offset 9 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_LINEAR_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_shift) + +/*define for DW_10 word*/ +/*define for linear_z field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_offset 10 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_mask 0x000007FF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_LINEAR_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_shift) + +/*define for linear_pitch field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_offset 10 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_shift) + +/*define for DW_11 word*/ +/*define for linear_slice_pitch field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_offset 11 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_mask 0x0FFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_shift) + +/*define for DW_12 word*/ +/*define for rect_x field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_offset 12 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_RECT_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_shift) + +/*define for rect_y field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_offset 12 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_RECT_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_shift) + +/*define for DW_13 word*/ +/*define for rect_z field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_offset 13 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_mask 0x000007FF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_RECT_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_shift) + +/*define for linear_sw field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_offset 13 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_shift) + +/*define for tile_sw field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_offset 13 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_shift 24 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_shift) + + +/* +** Definitions for SDMA_PKT_COPY_STRUCT packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_STRUCT_HEADER_op_offset 0 +#define SDMA_PKT_COPY_STRUCT_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_STRUCT_HEADER_op_shift 0 +#define SDMA_PKT_COPY_STRUCT_HEADER_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_STRUCT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_STRUCT_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_tmz_mask) << SDMA_PKT_COPY_STRUCT_HEADER_tmz_shift) + +/*define for detile field*/ +#define SDMA_PKT_COPY_STRUCT_HEADER_detile_offset 0 +#define SDMA_PKT_COPY_STRUCT_HEADER_detile_mask 0x00000001 +#define SDMA_PKT_COPY_STRUCT_HEADER_detile_shift 31 +#define SDMA_PKT_COPY_STRUCT_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_detile_mask) << SDMA_PKT_COPY_STRUCT_HEADER_detile_shift) + +/*define for SB_ADDR_LO word*/ +/*define for sb_addr_31_0 field*/ +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_offset 1 +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift 0 +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_SB_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift) + +/*define for SB_ADDR_HI word*/ +/*define for sb_addr_63_32 field*/ +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_offset 2 +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift 0 +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_SB_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift) + +/*define for START_INDEX word*/ +/*define for start_index field*/ +#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_offset 3 +#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift 0 +#define SDMA_PKT_COPY_STRUCT_START_INDEX_START_INDEX(x) (((x) & SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask) << SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_STRUCT_COUNT_count_offset 4 +#define SDMA_PKT_COPY_STRUCT_COUNT_count_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_STRUCT_COUNT_count_shift 0 +#define SDMA_PKT_COPY_STRUCT_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_STRUCT_COUNT_count_mask) << SDMA_PKT_COPY_STRUCT_COUNT_count_shift) + +/*define for DW_5 word*/ +/*define for stride field*/ +#define SDMA_PKT_COPY_STRUCT_DW_5_stride_offset 5 +#define SDMA_PKT_COPY_STRUCT_DW_5_stride_mask 0x000007FF +#define SDMA_PKT_COPY_STRUCT_DW_5_stride_shift 0 +#define SDMA_PKT_COPY_STRUCT_DW_5_STRIDE(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_stride_mask) << SDMA_PKT_COPY_STRUCT_DW_5_stride_shift) + +/*define for linear_sw field*/ +#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_offset 5 +#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask 0x00000003 +#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift 16 +#define SDMA_PKT_COPY_STRUCT_DW_5_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift) + +/*define for struct_sw field*/ +#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_offset 5 +#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask 0x00000003 +#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift 24 +#define SDMA_PKT_COPY_STRUCT_DW_5_STRUCT_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift) + +/*define for LINEAR_ADDR_LO word*/ +/*define for linear_addr_31_0 field*/ +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_offset 6 +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift 0 +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift) + +/*define for LINEAR_ADDR_HI word*/ +/*define for linear_addr_63_32 field*/ +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_offset 7 +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift 0 +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_WRITE_UNTILED packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_WRITE_UNTILED_HEADER_op_offset 0 +#define SDMA_PKT_WRITE_UNTILED_HEADER_op_mask 0x000000FF +#define SDMA_PKT_WRITE_UNTILED_HEADER_op_shift 0 +#define SDMA_PKT_WRITE_UNTILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_offset 0 +#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift 8 +#define SDMA_PKT_WRITE_UNTILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift) + +/*define for encrypt field*/ +#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_offset 0 +#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_mask 0x00000001 +#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_shift 16 +#define SDMA_PKT_WRITE_UNTILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_shift) + +/*define for tmz field*/ +#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_offset 0 +#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_shift 18 +#define SDMA_PKT_WRITE_UNTILED_HEADER_TMZ(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_tmz_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_tmz_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_offset 1 +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_offset 2 +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for count field*/ +#define SDMA_PKT_WRITE_UNTILED_DW_3_count_offset 3 +#define SDMA_PKT_WRITE_UNTILED_DW_3_count_mask 0x000FFFFF +#define SDMA_PKT_WRITE_UNTILED_DW_3_count_shift 0 +#define SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_count_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_count_shift) + +/*define for sw field*/ +#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_offset 3 +#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask 0x00000003 +#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift 24 +#define SDMA_PKT_WRITE_UNTILED_DW_3_SW(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift) + +/*define for DATA0 word*/ +/*define for data0 field*/ +#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_offset 4 +#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift 0 +#define SDMA_PKT_WRITE_UNTILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask) << SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift) + + +/* +** Definitions for SDMA_PKT_WRITE_TILED packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_WRITE_TILED_HEADER_op_offset 0 +#define SDMA_PKT_WRITE_TILED_HEADER_op_mask 0x000000FF +#define SDMA_PKT_WRITE_TILED_HEADER_op_shift 0 +#define SDMA_PKT_WRITE_TILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_offset 0 +#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift 8 +#define SDMA_PKT_WRITE_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift) + +/*define for encrypt field*/ +#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_offset 0 +#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_mask 0x00000001 +#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_shift 16 +#define SDMA_PKT_WRITE_TILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_encrypt_mask) << SDMA_PKT_WRITE_TILED_HEADER_encrypt_shift) + +/*define for tmz field*/ +#define SDMA_PKT_WRITE_TILED_HEADER_tmz_offset 0 +#define SDMA_PKT_WRITE_TILED_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_WRITE_TILED_HEADER_tmz_shift 18 +#define SDMA_PKT_WRITE_TILED_HEADER_TMZ(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_tmz_mask) << SDMA_PKT_WRITE_TILED_HEADER_tmz_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_offset 1 +#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_offset 2 +#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for width field*/ +#define SDMA_PKT_WRITE_TILED_DW_3_width_offset 3 +#define SDMA_PKT_WRITE_TILED_DW_3_width_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_DW_3_width_shift 0 +#define SDMA_PKT_WRITE_TILED_DW_3_WIDTH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_3_width_mask) << SDMA_PKT_WRITE_TILED_DW_3_width_shift) + +/*define for DW_4 word*/ +/*define for height field*/ +#define SDMA_PKT_WRITE_TILED_DW_4_height_offset 4 +#define SDMA_PKT_WRITE_TILED_DW_4_height_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_DW_4_height_shift 0 +#define SDMA_PKT_WRITE_TILED_DW_4_HEIGHT(x) (((x) & SDMA_PKT_WRITE_TILED_DW_4_height_mask) << SDMA_PKT_WRITE_TILED_DW_4_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_WRITE_TILED_DW_4_depth_offset 4 +#define SDMA_PKT_WRITE_TILED_DW_4_depth_mask 0x00001FFF +#define SDMA_PKT_WRITE_TILED_DW_4_depth_shift 16 +#define SDMA_PKT_WRITE_TILED_DW_4_DEPTH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_4_depth_mask) << SDMA_PKT_WRITE_TILED_DW_4_depth_shift) + +/*define for DW_5 word*/ +/*define for element_size field*/ +#define SDMA_PKT_WRITE_TILED_DW_5_element_size_offset 5 +#define SDMA_PKT_WRITE_TILED_DW_5_element_size_mask 0x00000007 +#define SDMA_PKT_WRITE_TILED_DW_5_element_size_shift 0 +#define SDMA_PKT_WRITE_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_element_size_mask) << SDMA_PKT_WRITE_TILED_DW_5_element_size_shift) + +/*define for swizzle_mode field*/ +#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_offset 5 +#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_mask 0x0000001F +#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_shift 3 +#define SDMA_PKT_WRITE_TILED_DW_5_SWIZZLE_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_mask) << SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_shift) + +/*define for dimension field*/ +#define SDMA_PKT_WRITE_TILED_DW_5_dimension_offset 5 +#define SDMA_PKT_WRITE_TILED_DW_5_dimension_mask 0x00000003 +#define SDMA_PKT_WRITE_TILED_DW_5_dimension_shift 9 +#define SDMA_PKT_WRITE_TILED_DW_5_DIMENSION(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_dimension_mask) << SDMA_PKT_WRITE_TILED_DW_5_dimension_shift) + +/*define for mip_max field*/ +#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_offset 5 +#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_mask 0x0000000F +#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_shift 16 +#define SDMA_PKT_WRITE_TILED_DW_5_MIP_MAX(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_mip_max_mask) << SDMA_PKT_WRITE_TILED_DW_5_mip_max_shift) + +/*define for DW_6 word*/ +/*define for x field*/ +#define SDMA_PKT_WRITE_TILED_DW_6_x_offset 6 +#define SDMA_PKT_WRITE_TILED_DW_6_x_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_DW_6_x_shift 0 +#define SDMA_PKT_WRITE_TILED_DW_6_X(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_x_mask) << SDMA_PKT_WRITE_TILED_DW_6_x_shift) + +/*define for y field*/ +#define SDMA_PKT_WRITE_TILED_DW_6_y_offset 6 +#define SDMA_PKT_WRITE_TILED_DW_6_y_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_DW_6_y_shift 16 +#define SDMA_PKT_WRITE_TILED_DW_6_Y(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_y_mask) << SDMA_PKT_WRITE_TILED_DW_6_y_shift) + +/*define for DW_7 word*/ +/*define for z field*/ +#define SDMA_PKT_WRITE_TILED_DW_7_z_offset 7 +#define SDMA_PKT_WRITE_TILED_DW_7_z_mask 0x00001FFF +#define SDMA_PKT_WRITE_TILED_DW_7_z_shift 0 +#define SDMA_PKT_WRITE_TILED_DW_7_Z(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_z_mask) << SDMA_PKT_WRITE_TILED_DW_7_z_shift) + +/*define for sw field*/ +#define SDMA_PKT_WRITE_TILED_DW_7_sw_offset 7 +#define SDMA_PKT_WRITE_TILED_DW_7_sw_mask 0x00000003 +#define SDMA_PKT_WRITE_TILED_DW_7_sw_shift 24 +#define SDMA_PKT_WRITE_TILED_DW_7_SW(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_sw_mask) << SDMA_PKT_WRITE_TILED_DW_7_sw_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_WRITE_TILED_COUNT_count_offset 8 +#define SDMA_PKT_WRITE_TILED_COUNT_count_mask 0x000FFFFF +#define SDMA_PKT_WRITE_TILED_COUNT_count_shift 0 +#define SDMA_PKT_WRITE_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_TILED_COUNT_count_mask) << SDMA_PKT_WRITE_TILED_COUNT_count_shift) + +/*define for DATA0 word*/ +/*define for data0 field*/ +#define SDMA_PKT_WRITE_TILED_DATA0_data0_offset 9 +#define SDMA_PKT_WRITE_TILED_DATA0_data0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_TILED_DATA0_data0_shift 0 +#define SDMA_PKT_WRITE_TILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_TILED_DATA0_data0_mask) << SDMA_PKT_WRITE_TILED_DATA0_data0_shift) + + +/* +** Definitions for SDMA_PKT_WRITE_TILED_BC packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_offset 0 +#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_mask 0x000000FF +#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_TILED_BC_HEADER_op_mask) << SDMA_PKT_WRITE_TILED_BC_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_offset 0 +#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_shift 8 +#define SDMA_PKT_WRITE_TILED_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_mask) << SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_offset 1 +#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_offset 2 +#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for width field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_offset 3 +#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_DW_3_WIDTH(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_3_width_mask) << SDMA_PKT_WRITE_TILED_BC_DW_3_width_shift) + +/*define for DW_4 word*/ +/*define for height field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_offset 4 +#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_DW_4_HEIGHT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_4_height_mask) << SDMA_PKT_WRITE_TILED_BC_DW_4_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_offset 4 +#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_mask 0x000007FF +#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_shift 16 +#define SDMA_PKT_WRITE_TILED_BC_DW_4_DEPTH(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_4_depth_mask) << SDMA_PKT_WRITE_TILED_BC_DW_4_depth_shift) + +/*define for DW_5 word*/ +/*define for element_size field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_mask 0x00000007 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_shift) + +/*define for array_mode field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_mask 0x0000000F +#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_shift 3 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_ARRAY_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_shift) + +/*define for mit_mode field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_mask 0x00000007 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_shift 8 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_MIT_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_shift) + +/*define for tilesplit_size field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_mask 0x00000007 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_shift 11 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_shift) + +/*define for bank_w field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_mask 0x00000003 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_shift 15 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_BANK_W(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_shift) + +/*define for bank_h field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_mask 0x00000003 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_shift 18 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_BANK_H(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_shift) + +/*define for num_bank field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_mask 0x00000003 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_shift 21 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_NUM_BANK(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_shift) + +/*define for mat_aspt field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_mask 0x00000003 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_shift 24 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_MAT_ASPT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_shift) + +/*define for pipe_config field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_mask 0x0000001F +#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_shift 26 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_PIPE_CONFIG(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_shift) + +/*define for DW_6 word*/ +/*define for x field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_offset 6 +#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_DW_6_X(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_6_x_mask) << SDMA_PKT_WRITE_TILED_BC_DW_6_x_shift) + +/*define for y field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_offset 6 +#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_shift 16 +#define SDMA_PKT_WRITE_TILED_BC_DW_6_Y(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_6_y_mask) << SDMA_PKT_WRITE_TILED_BC_DW_6_y_shift) + +/*define for DW_7 word*/ +/*define for z field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_offset 7 +#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_mask 0x000007FF +#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_DW_7_Z(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_7_z_mask) << SDMA_PKT_WRITE_TILED_BC_DW_7_z_shift) + +/*define for sw field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_offset 7 +#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_mask 0x00000003 +#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_shift 24 +#define SDMA_PKT_WRITE_TILED_BC_DW_7_SW(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_7_sw_mask) << SDMA_PKT_WRITE_TILED_BC_DW_7_sw_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_offset 8 +#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_mask 0x000FFFFF +#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_shift 2 +#define SDMA_PKT_WRITE_TILED_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_COUNT_count_mask) << SDMA_PKT_WRITE_TILED_BC_COUNT_count_shift) + +/*define for DATA0 word*/ +/*define for data0 field*/ +#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_offset 9 +#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DATA0_data0_mask) << SDMA_PKT_WRITE_TILED_BC_DATA0_data0_shift) + + +/* +** Definitions for SDMA_PKT_PTEPDE_COPY packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_PTEPDE_COPY_HEADER_op_offset 0 +#define SDMA_PKT_PTEPDE_COPY_HEADER_op_mask 0x000000FF +#define SDMA_PKT_PTEPDE_COPY_HEADER_op_shift 0 +#define SDMA_PKT_PTEPDE_COPY_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_offset 0 +#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_shift 8 +#define SDMA_PKT_PTEPDE_COPY_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_PTEPDE_COPY_HEADER_tmz_offset 0 +#define SDMA_PKT_PTEPDE_COPY_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_PTEPDE_COPY_HEADER_tmz_shift 18 +#define SDMA_PKT_PTEPDE_COPY_HEADER_TMZ(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_tmz_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_tmz_shift) + +/*define for ptepde_op field*/ +#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_offset 0 +#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_mask 0x00000001 +#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_shift 31 +#define SDMA_PKT_PTEPDE_COPY_HEADER_PTEPDE_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_offset 1 +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_offset 2 +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_offset 3 +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_offset 4 +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for MASK_DW0 word*/ +/*define for mask_dw0 field*/ +#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_offset 5 +#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_shift 0 +#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_MASK_DW0(x) (((x) & SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_mask) << SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_shift) + +/*define for MASK_DW1 word*/ +/*define for mask_dw1 field*/ +#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_offset 6 +#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_shift 0 +#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_MASK_DW1(x) (((x) & SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_mask) << SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_PTEPDE_COPY_COUNT_count_offset 7 +#define SDMA_PKT_PTEPDE_COPY_COUNT_count_mask 0x0007FFFF +#define SDMA_PKT_PTEPDE_COPY_COUNT_count_shift 0 +#define SDMA_PKT_PTEPDE_COPY_COUNT_COUNT(x) (((x) & SDMA_PKT_PTEPDE_COPY_COUNT_count_mask) << SDMA_PKT_PTEPDE_COPY_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_PTEPDE_COPY_BACKWARDS packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_offset 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_mask 0x000000FF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_offset 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_shift 8 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_shift) + +/*define for pte_size field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_offset 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_mask 0x00000003 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_shift 28 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_PTE_SIZE(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_shift) + +/*define for direction field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_offset 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_mask 0x00000001 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_shift 30 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_DIRECTION(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_shift) + +/*define for ptepde_op field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_offset 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_mask 0x00000001 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_shift 31 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_PTEPDE_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_offset 1 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_offset 2 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_offset 3 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_offset 4 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for MASK_BIT_FOR_DW word*/ +/*define for mask_first_xfer field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_offset 5 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_mask 0x000000FF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_MASK_FIRST_XFER(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_shift) + +/*define for mask_last_xfer field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_offset 5 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_mask 0x000000FF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_shift 8 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_MASK_LAST_XFER(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_shift) + +/*define for COUNT_IN_32B_XFER word*/ +/*define for count field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_offset 6 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_mask 0x0001FFFF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_COUNT(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_shift) + + +/* +** Definitions for SDMA_PKT_PTEPDE_RMW packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_op_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_op_mask 0x000000FF +#define SDMA_PKT_PTEPDE_RMW_HEADER_op_shift 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_op_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_shift 8 +#define SDMA_PKT_PTEPDE_RMW_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_shift) + +/*define for mtype field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_mask 0x00000007 +#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_shift 16 +#define SDMA_PKT_PTEPDE_RMW_HEADER_MTYPE(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_mtype_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_mtype_shift) + +/*define for gcc field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_mask 0x00000001 +#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_shift 19 +#define SDMA_PKT_PTEPDE_RMW_HEADER_GCC(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_gcc_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_gcc_shift) + +/*define for sys field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_mask 0x00000001 +#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_shift 20 +#define SDMA_PKT_PTEPDE_RMW_HEADER_SYS(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_sys_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_sys_shift) + +/*define for snp field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_mask 0x00000001 +#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_shift 22 +#define SDMA_PKT_PTEPDE_RMW_HEADER_SNP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_snp_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_snp_shift) + +/*define for gpa field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_mask 0x00000001 +#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_shift 23 +#define SDMA_PKT_PTEPDE_RMW_HEADER_GPA(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_gpa_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_gpa_shift) + +/*define for l2_policy field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_mask 0x00000003 +#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_shift 24 +#define SDMA_PKT_PTEPDE_RMW_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_mask) << SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_mask) << SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_shift) + +/*define for MASK_LO word*/ +/*define for mask_31_0 field*/ +#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_offset 3 +#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_shift 0 +#define SDMA_PKT_PTEPDE_RMW_MASK_LO_MASK_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_mask) << SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_shift) + +/*define for MASK_HI word*/ +/*define for mask_63_32 field*/ +#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_offset 4 +#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_shift 0 +#define SDMA_PKT_PTEPDE_RMW_MASK_HI_MASK_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_mask) << SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_shift) + +/*define for VALUE_LO word*/ +/*define for value_31_0 field*/ +#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_offset 5 +#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_shift 0 +#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_VALUE_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_mask) << SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_shift) + +/*define for VALUE_HI word*/ +/*define for value_63_32 field*/ +#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_offset 6 +#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_shift 0 +#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_VALUE_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_mask) << SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_shift) + + +/* +** Definitions for SDMA_PKT_WRITE_INCR packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_WRITE_INCR_HEADER_op_offset 0 +#define SDMA_PKT_WRITE_INCR_HEADER_op_mask 0x000000FF +#define SDMA_PKT_WRITE_INCR_HEADER_op_shift 0 +#define SDMA_PKT_WRITE_INCR_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_offset 0 +#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift 8 +#define SDMA_PKT_WRITE_INCR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_offset 1 +#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_offset 2 +#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for MASK_DW0 word*/ +/*define for mask_dw0 field*/ +#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_offset 3 +#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift 0 +#define SDMA_PKT_WRITE_INCR_MASK_DW0_MASK_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask) << SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift) + +/*define for MASK_DW1 word*/ +/*define for mask_dw1 field*/ +#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_offset 4 +#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift 0 +#define SDMA_PKT_WRITE_INCR_MASK_DW1_MASK_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask) << SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift) + +/*define for INIT_DW0 word*/ +/*define for init_dw0 field*/ +#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_offset 5 +#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift 0 +#define SDMA_PKT_WRITE_INCR_INIT_DW0_INIT_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask) << SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift) + +/*define for INIT_DW1 word*/ +/*define for init_dw1 field*/ +#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_offset 6 +#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift 0 +#define SDMA_PKT_WRITE_INCR_INIT_DW1_INIT_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask) << SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift) + +/*define for INCR_DW0 word*/ +/*define for incr_dw0 field*/ +#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_offset 7 +#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift 0 +#define SDMA_PKT_WRITE_INCR_INCR_DW0_INCR_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask) << SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift) + +/*define for INCR_DW1 word*/ +/*define for incr_dw1 field*/ +#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_offset 8 +#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift 0 +#define SDMA_PKT_WRITE_INCR_INCR_DW1_INCR_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask) << SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_WRITE_INCR_COUNT_count_offset 9 +#define SDMA_PKT_WRITE_INCR_COUNT_count_mask 0x0007FFFF +#define SDMA_PKT_WRITE_INCR_COUNT_count_shift 0 +#define SDMA_PKT_WRITE_INCR_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_INCR_COUNT_count_mask) << SDMA_PKT_WRITE_INCR_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_INDIRECT packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_INDIRECT_HEADER_op_offset 0 +#define SDMA_PKT_INDIRECT_HEADER_op_mask 0x000000FF +#define SDMA_PKT_INDIRECT_HEADER_op_shift 0 +#define SDMA_PKT_INDIRECT_HEADER_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_op_mask) << SDMA_PKT_INDIRECT_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_INDIRECT_HEADER_sub_op_offset 0 +#define SDMA_PKT_INDIRECT_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_INDIRECT_HEADER_sub_op_shift 8 +#define SDMA_PKT_INDIRECT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_sub_op_mask) << SDMA_PKT_INDIRECT_HEADER_sub_op_shift) + +/*define for vmid field*/ +#define SDMA_PKT_INDIRECT_HEADER_vmid_offset 0 +#define SDMA_PKT_INDIRECT_HEADER_vmid_mask 0x0000000F +#define SDMA_PKT_INDIRECT_HEADER_vmid_shift 16 +#define SDMA_PKT_INDIRECT_HEADER_VMID(x) (((x) & SDMA_PKT_INDIRECT_HEADER_vmid_mask) << SDMA_PKT_INDIRECT_HEADER_vmid_shift) + +/*define for priv field*/ +#define SDMA_PKT_INDIRECT_HEADER_priv_offset 0 +#define SDMA_PKT_INDIRECT_HEADER_priv_mask 0x00000001 +#define SDMA_PKT_INDIRECT_HEADER_priv_shift 31 +#define SDMA_PKT_INDIRECT_HEADER_PRIV(x) (((x) & SDMA_PKT_INDIRECT_HEADER_priv_mask) << SDMA_PKT_INDIRECT_HEADER_priv_shift) + +/*define for BASE_LO word*/ +/*define for ib_base_31_0 field*/ +#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_offset 1 +#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift 0 +#define SDMA_PKT_INDIRECT_BASE_LO_IB_BASE_31_0(x) (((x) & SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask) << SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift) + +/*define for BASE_HI word*/ +/*define for ib_base_63_32 field*/ +#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_offset 2 +#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift 0 +#define SDMA_PKT_INDIRECT_BASE_HI_IB_BASE_63_32(x) (((x) & SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask) << SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift) + +/*define for IB_SIZE word*/ +/*define for ib_size field*/ +#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_offset 3 +#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask 0x000FFFFF +#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift 0 +#define SDMA_PKT_INDIRECT_IB_SIZE_IB_SIZE(x) (((x) & SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask) << SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift) + +/*define for CSA_ADDR_LO word*/ +/*define for csa_addr_31_0 field*/ +#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_offset 4 +#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift 0 +#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_CSA_ADDR_31_0(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift) + +/*define for CSA_ADDR_HI word*/ +/*define for csa_addr_63_32 field*/ +#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_offset 5 +#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift 0 +#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_CSA_ADDR_63_32(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_SEMAPHORE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_SEMAPHORE_HEADER_op_offset 0 +#define SDMA_PKT_SEMAPHORE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_SEMAPHORE_HEADER_op_shift 0 +#define SDMA_PKT_SEMAPHORE_HEADER_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_offset 0 +#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift 8 +#define SDMA_PKT_SEMAPHORE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift) + +/*define for write_one field*/ +#define SDMA_PKT_SEMAPHORE_HEADER_write_one_offset 0 +#define SDMA_PKT_SEMAPHORE_HEADER_write_one_mask 0x00000001 +#define SDMA_PKT_SEMAPHORE_HEADER_write_one_shift 29 +#define SDMA_PKT_SEMAPHORE_HEADER_WRITE_ONE(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_write_one_mask) << SDMA_PKT_SEMAPHORE_HEADER_write_one_shift) + +/*define for signal field*/ +#define SDMA_PKT_SEMAPHORE_HEADER_signal_offset 0 +#define SDMA_PKT_SEMAPHORE_HEADER_signal_mask 0x00000001 +#define SDMA_PKT_SEMAPHORE_HEADER_signal_shift 30 +#define SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_signal_mask) << SDMA_PKT_SEMAPHORE_HEADER_signal_shift) + +/*define for mailbox field*/ +#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_offset 0 +#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask 0x00000001 +#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift 31 +#define SDMA_PKT_SEMAPHORE_HEADER_MAILBOX(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask) << SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_SEMAPHORE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_SEMAPHORE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_FENCE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_FENCE_HEADER_op_offset 0 +#define SDMA_PKT_FENCE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_FENCE_HEADER_op_shift 0 +#define SDMA_PKT_FENCE_HEADER_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_op_mask) << SDMA_PKT_FENCE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_FENCE_HEADER_sub_op_offset 0 +#define SDMA_PKT_FENCE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_FENCE_HEADER_sub_op_shift 8 +#define SDMA_PKT_FENCE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_sub_op_mask) << SDMA_PKT_FENCE_HEADER_sub_op_shift) + +/*define for mtype field*/ +#define SDMA_PKT_FENCE_HEADER_mtype_offset 0 +#define SDMA_PKT_FENCE_HEADER_mtype_mask 0x00000007 +#define SDMA_PKT_FENCE_HEADER_mtype_shift 16 +#define SDMA_PKT_FENCE_HEADER_MTYPE(x) (((x) & SDMA_PKT_FENCE_HEADER_mtype_mask) << SDMA_PKT_FENCE_HEADER_mtype_shift) + +/*define for gcc field*/ +#define SDMA_PKT_FENCE_HEADER_gcc_offset 0 +#define SDMA_PKT_FENCE_HEADER_gcc_mask 0x00000001 +#define SDMA_PKT_FENCE_HEADER_gcc_shift 19 +#define SDMA_PKT_FENCE_HEADER_GCC(x) (((x) & SDMA_PKT_FENCE_HEADER_gcc_mask) << SDMA_PKT_FENCE_HEADER_gcc_shift) + +/*define for sys field*/ +#define SDMA_PKT_FENCE_HEADER_sys_offset 0 +#define SDMA_PKT_FENCE_HEADER_sys_mask 0x00000001 +#define SDMA_PKT_FENCE_HEADER_sys_shift 20 +#define SDMA_PKT_FENCE_HEADER_SYS(x) (((x) & SDMA_PKT_FENCE_HEADER_sys_mask) << SDMA_PKT_FENCE_HEADER_sys_shift) + +/*define for snp field*/ +#define SDMA_PKT_FENCE_HEADER_snp_offset 0 +#define SDMA_PKT_FENCE_HEADER_snp_mask 0x00000001 +#define SDMA_PKT_FENCE_HEADER_snp_shift 22 +#define SDMA_PKT_FENCE_HEADER_SNP(x) (((x) & SDMA_PKT_FENCE_HEADER_snp_mask) << SDMA_PKT_FENCE_HEADER_snp_shift) + +/*define for gpa field*/ +#define SDMA_PKT_FENCE_HEADER_gpa_offset 0 +#define SDMA_PKT_FENCE_HEADER_gpa_mask 0x00000001 +#define SDMA_PKT_FENCE_HEADER_gpa_shift 23 +#define SDMA_PKT_FENCE_HEADER_GPA(x) (((x) & SDMA_PKT_FENCE_HEADER_gpa_mask) << SDMA_PKT_FENCE_HEADER_gpa_shift) + +/*define for l2_policy field*/ +#define SDMA_PKT_FENCE_HEADER_l2_policy_offset 0 +#define SDMA_PKT_FENCE_HEADER_l2_policy_mask 0x00000003 +#define SDMA_PKT_FENCE_HEADER_l2_policy_shift 24 +#define SDMA_PKT_FENCE_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_FENCE_HEADER_l2_policy_mask) << SDMA_PKT_FENCE_HEADER_l2_policy_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_FENCE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_FENCE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift) + +/*define for DATA word*/ +/*define for data field*/ +#define SDMA_PKT_FENCE_DATA_data_offset 3 +#define SDMA_PKT_FENCE_DATA_data_mask 0xFFFFFFFF +#define SDMA_PKT_FENCE_DATA_data_shift 0 +#define SDMA_PKT_FENCE_DATA_DATA(x) (((x) & SDMA_PKT_FENCE_DATA_data_mask) << SDMA_PKT_FENCE_DATA_data_shift) + + +/* +** Definitions for SDMA_PKT_SRBM_WRITE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_SRBM_WRITE_HEADER_op_offset 0 +#define SDMA_PKT_SRBM_WRITE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_SRBM_WRITE_HEADER_op_shift 0 +#define SDMA_PKT_SRBM_WRITE_HEADER_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_offset 0 +#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift 8 +#define SDMA_PKT_SRBM_WRITE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift) + +/*define for byte_en field*/ +#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_offset 0 +#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask 0x0000000F +#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift 28 +#define SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask) << SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift) + +/*define for ADDR word*/ +/*define for addr field*/ +#define SDMA_PKT_SRBM_WRITE_ADDR_addr_offset 1 +#define SDMA_PKT_SRBM_WRITE_ADDR_addr_mask 0x0003FFFF +#define SDMA_PKT_SRBM_WRITE_ADDR_addr_shift 0 +#define SDMA_PKT_SRBM_WRITE_ADDR_ADDR(x) (((x) & SDMA_PKT_SRBM_WRITE_ADDR_addr_mask) << SDMA_PKT_SRBM_WRITE_ADDR_addr_shift) + +/*define for apertureid field*/ +#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_offset 1 +#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_mask 0x00000FFF +#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_shift 20 +#define SDMA_PKT_SRBM_WRITE_ADDR_APERTUREID(x) (((x) & SDMA_PKT_SRBM_WRITE_ADDR_apertureid_mask) << SDMA_PKT_SRBM_WRITE_ADDR_apertureid_shift) + +/*define for DATA word*/ +/*define for data field*/ +#define SDMA_PKT_SRBM_WRITE_DATA_data_offset 2 +#define SDMA_PKT_SRBM_WRITE_DATA_data_mask 0xFFFFFFFF +#define SDMA_PKT_SRBM_WRITE_DATA_data_shift 0 +#define SDMA_PKT_SRBM_WRITE_DATA_DATA(x) (((x) & SDMA_PKT_SRBM_WRITE_DATA_data_mask) << SDMA_PKT_SRBM_WRITE_DATA_data_shift) + + +/* +** Definitions for SDMA_PKT_PRE_EXE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_PRE_EXE_HEADER_op_offset 0 +#define SDMA_PKT_PRE_EXE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_PRE_EXE_HEADER_op_shift 0 +#define SDMA_PKT_PRE_EXE_HEADER_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_op_mask) << SDMA_PKT_PRE_EXE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_PRE_EXE_HEADER_sub_op_offset 0 +#define SDMA_PKT_PRE_EXE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_PRE_EXE_HEADER_sub_op_shift 8 +#define SDMA_PKT_PRE_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_sub_op_mask) << SDMA_PKT_PRE_EXE_HEADER_sub_op_shift) + +/*define for dev_sel field*/ +#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_offset 0 +#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask 0x000000FF +#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift 16 +#define SDMA_PKT_PRE_EXE_HEADER_DEV_SEL(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask) << SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift) + +/*define for EXEC_COUNT word*/ +/*define for exec_count field*/ +#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_offset 1 +#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF +#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift 0 +#define SDMA_PKT_PRE_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift) + + +/* +** Definitions for SDMA_PKT_COND_EXE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COND_EXE_HEADER_op_offset 0 +#define SDMA_PKT_COND_EXE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COND_EXE_HEADER_op_shift 0 +#define SDMA_PKT_COND_EXE_HEADER_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_op_mask) << SDMA_PKT_COND_EXE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COND_EXE_HEADER_sub_op_offset 0 +#define SDMA_PKT_COND_EXE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COND_EXE_HEADER_sub_op_shift 8 +#define SDMA_PKT_COND_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_sub_op_mask) << SDMA_PKT_COND_EXE_HEADER_sub_op_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_COND_EXE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_COND_EXE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift) + +/*define for REFERENCE word*/ +/*define for reference field*/ +#define SDMA_PKT_COND_EXE_REFERENCE_reference_offset 3 +#define SDMA_PKT_COND_EXE_REFERENCE_reference_mask 0xFFFFFFFF +#define SDMA_PKT_COND_EXE_REFERENCE_reference_shift 0 +#define SDMA_PKT_COND_EXE_REFERENCE_REFERENCE(x) (((x) & SDMA_PKT_COND_EXE_REFERENCE_reference_mask) << SDMA_PKT_COND_EXE_REFERENCE_reference_shift) + +/*define for EXEC_COUNT word*/ +/*define for exec_count field*/ +#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_offset 4 +#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF +#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift 0 +#define SDMA_PKT_COND_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift) + + +/* +** Definitions for SDMA_PKT_CONSTANT_FILL packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_CONSTANT_FILL_HEADER_op_offset 0 +#define SDMA_PKT_CONSTANT_FILL_HEADER_op_mask 0x000000FF +#define SDMA_PKT_CONSTANT_FILL_HEADER_op_shift 0 +#define SDMA_PKT_CONSTANT_FILL_HEADER_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_offset 0 +#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift 8 +#define SDMA_PKT_CONSTANT_FILL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift) + +/*define for sw field*/ +#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_offset 0 +#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask 0x00000003 +#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift 16 +#define SDMA_PKT_CONSTANT_FILL_HEADER_SW(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift) + +/*define for fillsize field*/ +#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_offset 0 +#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask 0x00000003 +#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift 30 +#define SDMA_PKT_CONSTANT_FILL_HEADER_FILLSIZE(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_offset 1 +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_offset 2 +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DATA word*/ +/*define for src_data_31_0 field*/ +#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_offset 3 +#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift 0 +#define SDMA_PKT_CONSTANT_FILL_DATA_SRC_DATA_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_CONSTANT_FILL_COUNT_count_offset 4 +#define SDMA_PKT_CONSTANT_FILL_COUNT_count_mask 0x003FFFFF +#define SDMA_PKT_CONSTANT_FILL_COUNT_count_shift 0 +#define SDMA_PKT_CONSTANT_FILL_COUNT_COUNT(x) (((x) & SDMA_PKT_CONSTANT_FILL_COUNT_count_mask) << SDMA_PKT_CONSTANT_FILL_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_DATA_FILL_MULTI packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_offset 0 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_mask 0x000000FF +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_shift 0 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_OP(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_op_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_offset 0 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_shift 8 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_SUB_OP(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_shift) + +/*define for memlog_clr field*/ +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_offset 0 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_mask 0x00000001 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_shift 31 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_MEMLOG_CLR(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_shift) + +/*define for BYTE_STRIDE word*/ +/*define for byte_stride field*/ +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_offset 1 +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_mask 0xFFFFFFFF +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_shift 0 +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_BYTE_STRIDE(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_mask) << SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_shift) + +/*define for DMA_COUNT word*/ +/*define for dma_count field*/ +#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_offset 2 +#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_mask 0xFFFFFFFF +#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_shift 0 +#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_DMA_COUNT(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_mask) << SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_offset 3 +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_offset 4 +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for BYTE_COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_offset 5 +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_mask 0x03FFFFFF +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_shift 0 +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_COUNT(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_mask) << SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_POLL_REGMEM packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_POLL_REGMEM_HEADER_op_offset 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_op_mask 0x000000FF +#define SDMA_PKT_POLL_REGMEM_HEADER_op_shift 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_offset 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift 8 +#define SDMA_PKT_POLL_REGMEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift) + +/*define for hdp_flush field*/ +#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_offset 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask 0x00000001 +#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift 26 +#define SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask) << SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift) + +/*define for func field*/ +#define SDMA_PKT_POLL_REGMEM_HEADER_func_offset 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_func_mask 0x00000007 +#define SDMA_PKT_POLL_REGMEM_HEADER_func_shift 28 +#define SDMA_PKT_POLL_REGMEM_HEADER_FUNC(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_func_mask) << SDMA_PKT_POLL_REGMEM_HEADER_func_shift) + +/*define for mem_poll field*/ +#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_offset 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask 0x00000001 +#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift 31 +#define SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask) << SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_POLL_REGMEM_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_POLL_REGMEM_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift) + +/*define for VALUE word*/ +/*define for value field*/ +#define SDMA_PKT_POLL_REGMEM_VALUE_value_offset 3 +#define SDMA_PKT_POLL_REGMEM_VALUE_value_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_REGMEM_VALUE_value_shift 0 +#define SDMA_PKT_POLL_REGMEM_VALUE_VALUE(x) (((x) & SDMA_PKT_POLL_REGMEM_VALUE_value_mask) << SDMA_PKT_POLL_REGMEM_VALUE_value_shift) + +/*define for MASK word*/ +/*define for mask field*/ +#define SDMA_PKT_POLL_REGMEM_MASK_mask_offset 4 +#define SDMA_PKT_POLL_REGMEM_MASK_mask_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_REGMEM_MASK_mask_shift 0 +#define SDMA_PKT_POLL_REGMEM_MASK_MASK(x) (((x) & SDMA_PKT_POLL_REGMEM_MASK_mask_mask) << SDMA_PKT_POLL_REGMEM_MASK_mask_shift) + +/*define for DW5 word*/ +/*define for interval field*/ +#define SDMA_PKT_POLL_REGMEM_DW5_interval_offset 5 +#define SDMA_PKT_POLL_REGMEM_DW5_interval_mask 0x0000FFFF +#define SDMA_PKT_POLL_REGMEM_DW5_interval_shift 0 +#define SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_interval_mask) << SDMA_PKT_POLL_REGMEM_DW5_interval_shift) + +/*define for retry_count field*/ +#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_offset 5 +#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask 0x00000FFF +#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift 16 +#define SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask) << SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift) + + +/* +** Definitions for SDMA_PKT_POLL_REG_WRITE_MEM packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_offset 0 +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_mask 0x000000FF +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_shift 0 +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_offset 0 +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_shift 8 +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_shift) + +/*define for SRC_ADDR word*/ +/*define for addr_31_2 field*/ +#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_offset 1 +#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_mask 0x3FFFFFFF +#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_shift 2 +#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_ADDR_31_2(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_shift) + +/*define for DST_ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_offset 2 +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_offset 3 +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_POLL_DBIT_WRITE_MEM packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_offset 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_mask 0x000000FF +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_shift 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_offset 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_shift 8 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_shift) + +/*define for ea field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_offset 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_mask 0x00000003 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_shift 16 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_EA(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_shift) + +/*define for DST_ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift) + +/*define for START_PAGE word*/ +/*define for addr_31_4 field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_offset 3 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_mask 0x0FFFFFFF +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_shift 4 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_ADDR_31_4(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_shift) + +/*define for PAGE_NUM word*/ +/*define for page_num_31_0 field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_offset 4 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_shift 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_PAGE_NUM_31_0(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_shift) + + +/* +** Definitions for SDMA_PKT_POLL_MEM_VERIFY packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_offset 0 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_mask 0x000000FF +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_OP(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_offset 0 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_shift 8 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_shift) + +/*define for mode field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_offset 0 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_mask 0x00000001 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_shift 31 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_MODE(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_shift) + +/*define for PATTERN word*/ +/*define for pattern field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_offset 1 +#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_PATTERN(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_mask) << SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_shift) + +/*define for CMP0_ADDR_START_LO word*/ +/*define for cmp0_start_31_0 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_offset 2 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_CMP0_START_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_shift) + +/*define for CMP0_ADDR_START_HI word*/ +/*define for cmp0_start_63_32 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_offset 3 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_CMP0_START_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_shift) + +/*define for CMP0_ADDR_END_LO word*/ +/*define for cmp1_end_31_0 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_offset 4 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_CMP1_END_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_shift) + +/*define for CMP0_ADDR_END_HI word*/ +/*define for cmp1_end_63_32 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_offset 5 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_CMP1_END_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_shift) + +/*define for CMP1_ADDR_START_LO word*/ +/*define for cmp1_start_31_0 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_offset 6 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_CMP1_START_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_shift) + +/*define for CMP1_ADDR_START_HI word*/ +/*define for cmp1_start_63_32 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_offset 7 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_CMP1_START_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_shift) + +/*define for CMP1_ADDR_END_LO word*/ +/*define for cmp1_end_31_0 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_offset 8 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_CMP1_END_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_shift) + +/*define for CMP1_ADDR_END_HI word*/ +/*define for cmp1_end_63_32 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_offset 9 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_CMP1_END_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_shift) + +/*define for REC_ADDR_LO word*/ +/*define for rec_31_0 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_offset 10 +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_REC_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_shift) + +/*define for REC_ADDR_HI word*/ +/*define for rec_63_32 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_offset 11 +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_REC_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_shift) + +/*define for RESERVED word*/ +/*define for reserved field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_offset 12 +#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_RESERVED(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_mask) << SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_shift) + + +/* +** Definitions for SDMA_PKT_ATOMIC packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_ATOMIC_HEADER_op_offset 0 +#define SDMA_PKT_ATOMIC_HEADER_op_mask 0x000000FF +#define SDMA_PKT_ATOMIC_HEADER_op_shift 0 +#define SDMA_PKT_ATOMIC_HEADER_OP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_op_mask) << SDMA_PKT_ATOMIC_HEADER_op_shift) + +/*define for loop field*/ +#define SDMA_PKT_ATOMIC_HEADER_loop_offset 0 +#define SDMA_PKT_ATOMIC_HEADER_loop_mask 0x00000001 +#define SDMA_PKT_ATOMIC_HEADER_loop_shift 16 +#define SDMA_PKT_ATOMIC_HEADER_LOOP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_loop_mask) << SDMA_PKT_ATOMIC_HEADER_loop_shift) + +/*define for tmz field*/ +#define SDMA_PKT_ATOMIC_HEADER_tmz_offset 0 +#define SDMA_PKT_ATOMIC_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_ATOMIC_HEADER_tmz_shift 18 +#define SDMA_PKT_ATOMIC_HEADER_TMZ(x) (((x) & SDMA_PKT_ATOMIC_HEADER_tmz_mask) << SDMA_PKT_ATOMIC_HEADER_tmz_shift) + +/*define for atomic_op field*/ +#define SDMA_PKT_ATOMIC_HEADER_atomic_op_offset 0 +#define SDMA_PKT_ATOMIC_HEADER_atomic_op_mask 0x0000007F +#define SDMA_PKT_ATOMIC_HEADER_atomic_op_shift 25 +#define SDMA_PKT_ATOMIC_HEADER_ATOMIC_OP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_atomic_op_mask) << SDMA_PKT_ATOMIC_HEADER_atomic_op_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_ATOMIC_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_mask) << SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_ATOMIC_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_mask) << SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_shift) + +/*define for SRC_DATA_LO word*/ +/*define for src_data_31_0 field*/ +#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_offset 3 +#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_shift 0 +#define SDMA_PKT_ATOMIC_SRC_DATA_LO_SRC_DATA_31_0(x) (((x) & SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_mask) << SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_shift) + +/*define for SRC_DATA_HI word*/ +/*define for src_data_63_32 field*/ +#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_offset 4 +#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_shift 0 +#define SDMA_PKT_ATOMIC_SRC_DATA_HI_SRC_DATA_63_32(x) (((x) & SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_mask) << SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_shift) + +/*define for CMP_DATA_LO word*/ +/*define for cmp_data_31_0 field*/ +#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_offset 5 +#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_shift 0 +#define SDMA_PKT_ATOMIC_CMP_DATA_LO_CMP_DATA_31_0(x) (((x) & SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_mask) << SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_shift) + +/*define for CMP_DATA_HI word*/ +/*define for cmp_data_63_32 field*/ +#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_offset 6 +#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_shift 0 +#define SDMA_PKT_ATOMIC_CMP_DATA_HI_CMP_DATA_63_32(x) (((x) & SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_mask) << SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_shift) + +/*define for LOOP_INTERVAL word*/ +/*define for loop_interval field*/ +#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_offset 7 +#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_mask 0x00001FFF +#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_shift 0 +#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_LOOP_INTERVAL(x) (((x) & SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_mask) << SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_shift) + + +/* +** Definitions for SDMA_PKT_TIMESTAMP_SET packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_offset 0 +#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask 0x000000FF +#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift 0 +#define SDMA_PKT_TIMESTAMP_SET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_offset 0 +#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift 8 +#define SDMA_PKT_TIMESTAMP_SET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift) + +/*define for INIT_DATA_LO word*/ +/*define for init_data_31_0 field*/ +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_offset 1 +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift 0 +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_INIT_DATA_31_0(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift) + +/*define for INIT_DATA_HI word*/ +/*define for init_data_63_32 field*/ +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_offset 2 +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift 0 +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_INIT_DATA_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift) + + +/* +** Definitions for SDMA_PKT_TIMESTAMP_GET packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_offset 0 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask 0x000000FF +#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift 0 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_offset 0 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift 8 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift) + +/*define for WRITE_ADDR_LO word*/ +/*define for write_addr_31_3 field*/ +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_offset 1 +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift 3 +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift) + +/*define for WRITE_ADDR_HI word*/ +/*define for write_addr_63_32 field*/ +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_offset 2 +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift 0 +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_TIMESTAMP_GET_GLOBAL packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_offset 0 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask 0x000000FF +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift 0 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_offset 0 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift 8 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift) + +/*define for WRITE_ADDR_LO word*/ +/*define for write_addr_31_3 field*/ +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_offset 1 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift 3 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift) + +/*define for WRITE_ADDR_HI word*/ +/*define for write_addr_63_32 field*/ +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_offset 2 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift 0 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_TRAP packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_TRAP_HEADER_op_offset 0 +#define SDMA_PKT_TRAP_HEADER_op_mask 0x000000FF +#define SDMA_PKT_TRAP_HEADER_op_shift 0 +#define SDMA_PKT_TRAP_HEADER_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_op_mask) << SDMA_PKT_TRAP_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_TRAP_HEADER_sub_op_offset 0 +#define SDMA_PKT_TRAP_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_TRAP_HEADER_sub_op_shift 8 +#define SDMA_PKT_TRAP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_sub_op_mask) << SDMA_PKT_TRAP_HEADER_sub_op_shift) + +/*define for INT_CONTEXT word*/ +/*define for int_context field*/ +#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_offset 1 +#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask 0x0FFFFFFF +#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift 0 +#define SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(x) (((x) & SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask) << SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift) + + +/* +** Definitions for SDMA_PKT_DUMMY_TRAP packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_DUMMY_TRAP_HEADER_op_offset 0 +#define SDMA_PKT_DUMMY_TRAP_HEADER_op_mask 0x000000FF +#define SDMA_PKT_DUMMY_TRAP_HEADER_op_shift 0 +#define SDMA_PKT_DUMMY_TRAP_HEADER_OP(x) (((x) & SDMA_PKT_DUMMY_TRAP_HEADER_op_mask) << SDMA_PKT_DUMMY_TRAP_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_offset 0 +#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_shift 8 +#define SDMA_PKT_DUMMY_TRAP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_mask) << SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_shift) + +/*define for INT_CONTEXT word*/ +/*define for int_context field*/ +#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_offset 1 +#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_mask 0x0FFFFFFF +#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_shift 0 +#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_INT_CONTEXT(x) (((x) & SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_mask) << SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_shift) + + +/* +** Definitions for SDMA_PKT_GPUVM_INV packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_GPUVM_INV_HEADER_op_offset 0 +#define SDMA_PKT_GPUVM_INV_HEADER_op_mask 0x000000FF +#define SDMA_PKT_GPUVM_INV_HEADER_op_shift 0 +#define SDMA_PKT_GPUVM_INV_HEADER_OP(x) (((x) & SDMA_PKT_GPUVM_INV_HEADER_op_mask) << SDMA_PKT_GPUVM_INV_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_offset 0 +#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_shift 8 +#define SDMA_PKT_GPUVM_INV_HEADER_SUB_OP(x) (((x) & SDMA_PKT_GPUVM_INV_HEADER_sub_op_mask) << SDMA_PKT_GPUVM_INV_HEADER_sub_op_shift) + +/*define for PAYLOAD1 word*/ +/*define for per_vmid_inv_req field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_mask 0x0000FFFF +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_shift 0 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_PER_VMID_INV_REQ(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_shift) + +/*define for flush_type field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_mask 0x00000007 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_shift 16 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_FLUSH_TYPE(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_shift) + +/*define for l2_ptes field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_shift 19 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_shift) + +/*define for l2_pde0 field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_shift 20 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE0(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_shift) + +/*define for l2_pde1 field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_shift 21 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE1(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_shift) + +/*define for l2_pde2 field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_shift 22 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE2(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_shift) + +/*define for l1_ptes field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_shift 23 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L1_PTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_shift) + +/*define for clr_protection_fault_status_addr field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_shift 24 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_CLR_PROTECTION_FAULT_STATUS_ADDR(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_shift) + +/*define for log_request field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_shift 25 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_LOG_REQUEST(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_shift) + +/*define for four_kilobytes field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_shift 26 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_FOUR_KILOBYTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_shift) + +/*define for PAYLOAD2 word*/ +/*define for s field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_offset 2 +#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_shift 0 +#define SDMA_PKT_GPUVM_INV_PAYLOAD2_S(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD2_s_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD2_s_shift) + +/*define for page_va_42_12 field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_offset 2 +#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_mask 0x7FFFFFFF +#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_shift 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD2_PAGE_VA_42_12(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_shift) + +/*define for PAYLOAD3 word*/ +/*define for page_va_47_43 field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_offset 3 +#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_mask 0x0000003F +#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_shift 0 +#define SDMA_PKT_GPUVM_INV_PAYLOAD3_PAGE_VA_47_43(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_shift) + + +/* +** Definitions for SDMA_PKT_GCR_REQ packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_GCR_REQ_HEADER_op_offset 0 +#define SDMA_PKT_GCR_REQ_HEADER_op_mask 0x000000FF +#define SDMA_PKT_GCR_REQ_HEADER_op_shift 0 +#define SDMA_PKT_GCR_REQ_HEADER_OP(x) (((x) & SDMA_PKT_GCR_REQ_HEADER_op_mask) << SDMA_PKT_GCR_REQ_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_GCR_REQ_HEADER_sub_op_offset 0 +#define SDMA_PKT_GCR_REQ_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_GCR_REQ_HEADER_sub_op_shift 8 +#define SDMA_PKT_GCR_REQ_HEADER_SUB_OP(x) (((x) & SDMA_PKT_GCR_REQ_HEADER_sub_op_mask) << SDMA_PKT_GCR_REQ_HEADER_sub_op_shift) + +/*define for PAYLOAD1 word*/ +/*define for base_va_31_7 field*/ +#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_offset 1 +#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_mask 0x01FFFFFF +#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_shift 7 +#define SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_mask) << SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_shift) + +/*define for PAYLOAD2 word*/ +/*define for base_va_47_32 field*/ +#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_offset 2 +#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_mask 0x0000FFFF +#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_shift 0 +#define SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_mask) << SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_shift) + +/*define for gcr_control_15_0 field*/ +#define SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_offset 2 +#define SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_mask 0x0000FFFF +#define SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_shift 16 +#define SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_mask) << SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_shift) + +/*define for PAYLOAD3 word*/ +/*define for gcr_control_18_16 field*/ +#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_offset 3 +#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_mask 0x00000007 +#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_shift 0 +#define SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_mask) << SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_shift) + +/*define for limit_va_31_7 field*/ +#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_offset 3 +#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_mask 0x01FFFFFF +#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_shift 7 +#define SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_mask) << SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_shift) + +/*define for PAYLOAD4 word*/ +/*define for limit_va_47_32 field*/ +#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_offset 4 +#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_mask 0x0000FFFF +#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_shift 0 +#define SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_mask) << SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_shift) + +/*define for vmid field*/ +#define SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_offset 4 +#define SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_mask 0x0000000F +#define SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_shift 24 +#define SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_mask) << SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_shift) + + +/* +** Definitions for SDMA_PKT_NOP packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_NOP_HEADER_op_offset 0 +#define SDMA_PKT_NOP_HEADER_op_mask 0x000000FF +#define SDMA_PKT_NOP_HEADER_op_shift 0 +#define SDMA_PKT_NOP_HEADER_OP(x) (((x) & SDMA_PKT_NOP_HEADER_op_mask) << SDMA_PKT_NOP_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_NOP_HEADER_sub_op_offset 0 +#define SDMA_PKT_NOP_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_NOP_HEADER_sub_op_shift 8 +#define SDMA_PKT_NOP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_NOP_HEADER_sub_op_mask) << SDMA_PKT_NOP_HEADER_sub_op_shift) + +/*define for count field*/ +#define SDMA_PKT_NOP_HEADER_count_offset 0 +#define SDMA_PKT_NOP_HEADER_count_mask 0x00003FFF +#define SDMA_PKT_NOP_HEADER_count_shift 16 +#define SDMA_PKT_NOP_HEADER_COUNT(x) (((x) & SDMA_PKT_NOP_HEADER_count_mask) << SDMA_PKT_NOP_HEADER_count_shift) + +/*define for DATA0 word*/ +/*define for data0 field*/ +#define SDMA_PKT_NOP_DATA0_data0_offset 1 +#define SDMA_PKT_NOP_DATA0_data0_mask 0xFFFFFFFF +#define SDMA_PKT_NOP_DATA0_data0_shift 0 +#define SDMA_PKT_NOP_DATA0_DATA0(x) (((x) & SDMA_PKT_NOP_DATA0_data0_mask) << SDMA_PKT_NOP_DATA0_data0_shift) + + +/* +** Definitions for SDMA_AQL_PKT_HEADER packet +*/ + +/*define for HEADER word*/ +/*define for format field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_format_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_format_mask 0x000000FF +#define SDMA_AQL_PKT_HEADER_HEADER_format_shift 0 +#define SDMA_AQL_PKT_HEADER_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_format_mask) << SDMA_AQL_PKT_HEADER_HEADER_format_shift) + +/*define for barrier field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_barrier_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_barrier_mask 0x00000001 +#define SDMA_AQL_PKT_HEADER_HEADER_barrier_shift 8 +#define SDMA_AQL_PKT_HEADER_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_barrier_mask) << SDMA_AQL_PKT_HEADER_HEADER_barrier_shift) + +/*define for acquire_fence_scope field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_mask 0x00000003 +#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_shift 9 +#define SDMA_AQL_PKT_HEADER_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_shift) + +/*define for release_fence_scope field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_mask 0x00000003 +#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_shift 11 +#define SDMA_AQL_PKT_HEADER_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_shift) + +/*define for reserved field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_reserved_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_reserved_mask 0x00000007 +#define SDMA_AQL_PKT_HEADER_HEADER_reserved_shift 13 +#define SDMA_AQL_PKT_HEADER_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_reserved_mask) << SDMA_AQL_PKT_HEADER_HEADER_reserved_shift) + +/*define for op field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_op_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_op_mask 0x0000000F +#define SDMA_AQL_PKT_HEADER_HEADER_op_shift 16 +#define SDMA_AQL_PKT_HEADER_HEADER_OP(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_op_mask) << SDMA_AQL_PKT_HEADER_HEADER_op_shift) + +/*define for subop field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_subop_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_subop_mask 0x00000007 +#define SDMA_AQL_PKT_HEADER_HEADER_subop_shift 20 +#define SDMA_AQL_PKT_HEADER_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_subop_mask) << SDMA_AQL_PKT_HEADER_HEADER_subop_shift) + + +/* +** Definitions for SDMA_AQL_PKT_COPY_LINEAR packet +*/ + +/*define for HEADER word*/ +/*define for format field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_mask 0x000000FF +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_shift) + +/*define for barrier field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_mask 0x00000001 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_shift 8 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_shift) + +/*define for acquire_fence_scope field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_mask 0x00000003 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_shift 9 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_shift) + +/*define for release_fence_scope field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_mask 0x00000003 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_shift 11 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_shift) + +/*define for reserved field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_mask 0x00000007 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_shift 13 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_shift) + +/*define for op field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_mask 0x0000000F +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_shift 16 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_OP(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_shift) + +/*define for subop field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_mask 0x00000007 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_shift 20 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_shift) + +/*define for RESERVED_DW1 word*/ +/*define for reserved_dw1 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_offset 1 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_RESERVED_DW1(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_shift) + +/*define for RETURN_ADDR_LO word*/ +/*define for return_addr_31_0 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_offset 2 +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_RETURN_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_shift) + +/*define for RETURN_ADDR_HI word*/ +/*define for return_addr_63_32 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_offset 3 +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_RETURN_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_offset 4 +#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_mask 0x003FFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_COUNT(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_mask) << SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_shift) + +/*define for PARAMETER word*/ +/*define for dst_sw field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_offset 5 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask 0x00000003 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift 16 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift) + +/*define for src_sw field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_offset 5 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_mask 0x00000003 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_shift 24 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 6 +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 7 +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 8 +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 9 +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for RESERVED_DW10 word*/ +/*define for reserved_dw10 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_offset 10 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_RESERVED_DW10(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_shift) + +/*define for RESERVED_DW11 word*/ +/*define for reserved_dw11 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_offset 11 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_RESERVED_DW11(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_shift) + +/*define for RESERVED_DW12 word*/ +/*define for reserved_dw12 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_offset 12 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_RESERVED_DW12(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_shift) + +/*define for RESERVED_DW13 word*/ +/*define for reserved_dw13 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_offset 13 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_RESERVED_DW13(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_shift) + +/*define for COMPLETION_SIGNAL_LO word*/ +/*define for completion_signal_31_0 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_offset 14 +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_COMPLETION_SIGNAL_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift) + +/*define for COMPLETION_SIGNAL_HI word*/ +/*define for completion_signal_63_32 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_offset 15 +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift) + + +/* +** Definitions for SDMA_AQL_PKT_BARRIER_OR packet +*/ + +/*define for HEADER word*/ +/*define for format field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_mask 0x000000FF +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_format_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_format_shift) + +/*define for barrier field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_mask 0x00000001 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_shift 8 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_shift) + +/*define for acquire_fence_scope field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_mask 0x00000003 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_shift 9 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_shift) + +/*define for release_fence_scope field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_mask 0x00000003 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_shift 11 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_shift) + +/*define for reserved field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_mask 0x00000007 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_shift 13 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_shift) + +/*define for op field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_mask 0x0000000F +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_shift 16 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_OP(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_op_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_op_shift) + +/*define for subop field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_mask 0x00000007 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_shift 20 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_shift) + +/*define for RESERVED_DW1 word*/ +/*define for reserved_dw1 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_offset 1 +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_RESERVED_DW1(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_shift) + +/*define for DEPENDENT_ADDR_0_LO word*/ +/*define for dependent_addr_0_31_0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_offset 2 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_DEPENDENT_ADDR_0_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_shift) + +/*define for DEPENDENT_ADDR_0_HI word*/ +/*define for dependent_addr_0_63_32 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_offset 3 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_DEPENDENT_ADDR_0_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_shift) + +/*define for DEPENDENT_ADDR_1_LO word*/ +/*define for dependent_addr_1_31_0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_offset 4 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_DEPENDENT_ADDR_1_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_shift) + +/*define for DEPENDENT_ADDR_1_HI word*/ +/*define for dependent_addr_1_63_32 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_offset 5 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_DEPENDENT_ADDR_1_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_shift) + +/*define for DEPENDENT_ADDR_2_LO word*/ +/*define for dependent_addr_2_31_0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_offset 6 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_DEPENDENT_ADDR_2_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_shift) + +/*define for DEPENDENT_ADDR_2_HI word*/ +/*define for dependent_addr_2_63_32 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_offset 7 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_DEPENDENT_ADDR_2_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_shift) + +/*define for DEPENDENT_ADDR_3_LO word*/ +/*define for dependent_addr_3_31_0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_offset 8 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_DEPENDENT_ADDR_3_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_shift) + +/*define for DEPENDENT_ADDR_3_HI word*/ +/*define for dependent_addr_3_63_32 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_offset 9 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_DEPENDENT_ADDR_3_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_shift) + +/*define for DEPENDENT_ADDR_4_LO word*/ +/*define for dependent_addr_4_31_0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_offset 10 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_DEPENDENT_ADDR_4_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_shift) + +/*define for DEPENDENT_ADDR_4_HI word*/ +/*define for dependent_addr_4_63_32 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_offset 11 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_DEPENDENT_ADDR_4_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_shift) + +/*define for RESERVED_DW12 word*/ +/*define for reserved_dw12 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_offset 12 +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_RESERVED_DW12(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_shift) + +/*define for RESERVED_DW13 word*/ +/*define for reserved_dw13 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_offset 13 +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_RESERVED_DW13(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_shift) + +/*define for COMPLETION_SIGNAL_LO word*/ +/*define for completion_signal_31_0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_offset 14 +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_COMPLETION_SIGNAL_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift) + +/*define for COMPLETION_SIGNAL_HI word*/ +/*define for completion_signal_63_32 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_offset 15 +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift) + + +#endif /* __NAVI10_SDMA_PKT_OPEN_H_ */ diff --git a/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c new file mode 100644 index 000000000000..a786d159e5e9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c @@ -0,0 +1,52 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "nv.h" + +#include "soc15_common.h" +#include "navi12_ip_offset.h" + +int navi12_reg_base_init(struct amdgpu_device *adev) +{ + /* HW has more IP blocks, only initialized the blocks needed by driver */ + uint32_t i; + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); + adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); + } + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c new file mode 100644 index 000000000000..4ea1e8fbb601 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c @@ -0,0 +1,53 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "nv.h" + +#include "soc15_common.h" +#include "navi14_ip_offset.h" + +int navi14_reg_base_init(struct amdgpu_device *adev) +{ + int i; + + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); + adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c new file mode 100644 index 000000000000..f3a3fe746222 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -0,0 +1,343 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_atombios.h" +#include "nbio_v2_3.h" + +#include "nbio/nbio_2_3_default.h" +#include "nbio/nbio_2_3_offset.h" +#include "nbio/nbio_2_3_sh_mask.h" +#include <uapi/linux/kfd_ioctl.h> + +#define smnPCIE_CONFIG_CNTL 0x11180044 +#define smnCPM_CONTROL 0x11180460 +#define smnPCIE_CNTL2 0x11180070 + + +static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} + +static u32 nbio_v2_3_get_rev_id(struct amdgpu_device *adev) +{ + u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); + + tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; + tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; + + return tmp; +} + +static void nbio_v2_3_mc_access_enable(struct amdgpu_device *adev, bool enable) +{ + if (enable) + WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, + BIF_FB_EN__FB_READ_EN_MASK | + BIF_FB_EN__FB_WRITE_EN_MASK); + else + WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0); +} + +static void nbio_v2_3_hdp_flush(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) + WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + else + amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); +} + +static u32 nbio_v2_3_get_memsize(struct amdgpu_device *adev) +{ + return RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_RCC_CONFIG_MEMSIZE); +} + +static void nbio_v2_3_sdma_doorbell_range(struct amdgpu_device *adev, int instance, + bool use_doorbell, int doorbell_index, + int doorbell_size) +{ + u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) : + SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); + + u32 doorbell_range = RREG32(reg); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_SDMA0_DOORBELL_RANGE, OFFSET, + doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_SDMA0_DOORBELL_RANGE, SIZE, + doorbell_size); + } else + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_SDMA0_DOORBELL_RANGE, SIZE, + 0); + + WREG32(reg, doorbell_range); +} + +static void nbio_v2_3_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, + int doorbell_index, int instance) +{ + u32 reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE); + + u32 doorbell_range = RREG32(reg); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, OFFSET, + doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, SIZE, 8); + } else + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, SIZE, 0); + + WREG32(reg, doorbell_range); +} + +static void nbio_v2_3_enable_doorbell_aperture(struct amdgpu_device *adev, + bool enable) +{ + WREG32_FIELD15(NBIO, 0, RCC_DEV0_EPF0_RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, + enable ? 1 : 0); +} + +static void nbio_v2_3_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp = 0; + + if (enable) { + tmp = REG_SET_FIELD(tmp, BIF_BX_PF_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_EN, 1) | + REG_SET_FIELD(tmp, BIF_BX_PF_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_MODE, 1) | + REG_SET_FIELD(tmp, BIF_BX_PF_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_SIZE, 0); + + WREG32_SOC15(NBIO, 0, mmBIF_BX_PF_DOORBELL_SELFRING_GPA_APER_BASE_LOW, + lower_32_bits(adev->doorbell.base)); + WREG32_SOC15(NBIO, 0, mmBIF_BX_PF_DOORBELL_SELFRING_GPA_APER_BASE_HIGH, + upper_32_bits(adev->doorbell.base)); + } + + WREG32_SOC15(NBIO, 0, mmBIF_BX_PF_DOORBELL_SELFRING_GPA_APER_CNTL, + tmp); +} + + +static void nbio_v2_3_ih_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index) +{ + u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE); + + if (use_doorbell) { + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + BIF_IH_DOORBELL_RANGE, OFFSET, + doorbell_index); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + BIF_IH_DOORBELL_RANGE, SIZE, + 2); + } else + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + BIF_IH_DOORBELL_RANGE, SIZE, + 0); + + WREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE, ih_doorbell_range); +} + +static void nbio_v2_3_ih_control(struct amdgpu_device *adev) +{ + u32 interrupt_cntl; + + /* setup interrupt control */ + WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8); + + interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL); + /* + * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi + * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN + */ + interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, + IH_DUMMY_RD_OVERRIDE, 0); + + /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */ + interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, + IH_REQ_NONSNOOP_EN, 0); + + WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl); +} + +static void nbio_v2_3_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_PCIE(smnCPM_CONTROL); + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) { + data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | + CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); + } else { + data &= ~(CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | + CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); + } + + if (def != data) + WREG32_PCIE(smnCPM_CONTROL, data); +} + +static void nbio_v2_3_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_PCIE(smnPCIE_CNTL2); + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) { + data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK | + PCIE_CNTL2__MST_MEM_LS_EN_MASK | + PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK); + } else { + data &= ~(PCIE_CNTL2__SLV_MEM_LS_EN_MASK | + PCIE_CNTL2__MST_MEM_LS_EN_MASK | + PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK); + } + + if (def != data) + WREG32_PCIE(smnPCIE_CNTL2, data); +} + +static void nbio_v2_3_get_clockgating_state(struct amdgpu_device *adev, + u32 *flags) +{ + int data; + + /* AMD_CG_SUPPORT_BIF_MGCG */ + data = RREG32_PCIE(smnCPM_CONTROL); + if (data & CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_BIF_MGCG; + + /* AMD_CG_SUPPORT_BIF_LS */ + data = RREG32_PCIE(smnPCIE_CNTL2); + if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_BIF_LS; +} + +static u32 nbio_v2_3_get_hdp_flush_req_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_GPU_HDP_FLUSH_REQ); +} + +static u32 nbio_v2_3_get_hdp_flush_done_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_GPU_HDP_FLUSH_DONE); +} + +static u32 nbio_v2_3_get_pcie_index_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2); +} + +static u32 nbio_v2_3_get_pcie_data_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2); +} + +const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg = { + .ref_and_mask_cp0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP0_MASK, + .ref_and_mask_cp1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP1_MASK, + .ref_and_mask_cp2 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP2_MASK, + .ref_and_mask_cp3 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP3_MASK, + .ref_and_mask_cp4 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP4_MASK, + .ref_and_mask_cp5 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP5_MASK, + .ref_and_mask_cp6 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP6_MASK, + .ref_and_mask_cp7 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP7_MASK, + .ref_and_mask_cp8 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP8_MASK, + .ref_and_mask_cp9 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP9_MASK, + .ref_and_mask_sdma0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA0_MASK, + .ref_and_mask_sdma1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA1_MASK, +}; + +static void nbio_v2_3_detect_hw_virt(struct amdgpu_device *adev) +{ + uint32_t reg; + + reg = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_RCC_IOV_FUNC_IDENTIFIER); + if (reg & 1) + adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF; + + if (reg & 0x80000000) + adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV; + + if (!reg) { + if (is_virtual_machine()) /* passthrough mode exclus sriov mod */ + adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; + } +} + +static void nbio_v2_3_init_registers(struct amdgpu_device *adev) +{ + uint32_t def, data; + + def = data = RREG32_PCIE(smnPCIE_CONFIG_CNTL); + data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1); + data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1); + + if (def != data) + WREG32_PCIE(smnPCIE_CONFIG_CNTL, data); +} + +const struct amdgpu_nbio_funcs nbio_v2_3_funcs = { + .get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset, + .get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset, + .get_pcie_index_offset = nbio_v2_3_get_pcie_index_offset, + .get_pcie_data_offset = nbio_v2_3_get_pcie_data_offset, + .get_rev_id = nbio_v2_3_get_rev_id, + .mc_access_enable = nbio_v2_3_mc_access_enable, + .hdp_flush = nbio_v2_3_hdp_flush, + .get_memsize = nbio_v2_3_get_memsize, + .sdma_doorbell_range = nbio_v2_3_sdma_doorbell_range, + .vcn_doorbell_range = nbio_v2_3_vcn_doorbell_range, + .enable_doorbell_aperture = nbio_v2_3_enable_doorbell_aperture, + .enable_doorbell_selfring_aperture = nbio_v2_3_enable_doorbell_selfring_aperture, + .ih_doorbell_range = nbio_v2_3_ih_doorbell_range, + .update_medium_grain_clock_gating = nbio_v2_3_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbio_v2_3_update_medium_grain_light_sleep, + .get_clockgating_state = nbio_v2_3_get_clockgating_state, + .ih_control = nbio_v2_3_ih_control, + .init_registers = nbio_v2_3_init_registers, + .detect_hw_virt = nbio_v2_3_detect_hw_virt, + .remap_hdp_registers = nbio_v2_3_remap_hdp_registers, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h new file mode 100644 index 000000000000..a43b60acf7f6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h @@ -0,0 +1,32 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __NBIO_V2_3_H__ +#define __NBIO_V2_3_H__ + +#include "soc15_common.h" + +extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg; +extern const struct amdgpu_nbio_funcs nbio_v2_3_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index cc967dbfd631..635d9e1fc0a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -118,7 +118,8 @@ static void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev, if (use_doorbell) { ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index); - ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 2); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + BIF_IH_DOORBELL_RANGE, SIZE, 6); } else ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0); @@ -225,7 +226,7 @@ static u32 nbio_v6_1_get_pcie_data_offset(struct amdgpu_device *adev) return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2); } -static const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = { +const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = { .ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK, .ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK, .ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK, @@ -276,7 +277,6 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev) } const struct amdgpu_nbio_funcs nbio_v6_1_funcs = { - .hdp_flush_reg = &nbio_v6_1_hdp_flush_reg, .get_hdp_flush_req_offset = nbio_v6_1_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v6_1_get_hdp_flush_done_offset, .get_pcie_index_offset = nbio_v6_1_get_pcie_index_offset, diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h index 0743a6f016f3..6dc743b73218 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h @@ -26,6 +26,7 @@ #include "soc15_common.h" +extern const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg; extern const struct amdgpu_nbio_funcs nbio_v6_1_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index 1cdb98ad2db3..d6cbf26074bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -29,9 +29,18 @@ #include "nbio/nbio_7_0_sh_mask.h" #include "nbio/nbio_7_0_smn.h" #include "vega10_enum.h" +#include <uapi/linux/kfd_ioctl.h> #define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c +static void nbio_v7_0_remap_hdp_registers(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} + static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); @@ -55,10 +64,9 @@ static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); + WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else - amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( - NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0); + amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev) @@ -83,6 +91,26 @@ static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instan WREG32(reg, doorbell_range); } +static void nbio_v7_0_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, + int doorbell_index, int instance) +{ + u32 reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE); + + u32 doorbell_range = RREG32(reg); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, OFFSET, + doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, SIZE, 8); + } else + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, SIZE, 0); + + WREG32(reg, doorbell_range); +} + static void nbio_v7_0_enable_doorbell_aperture(struct amdgpu_device *adev, bool enable) { @@ -264,7 +292,6 @@ static void nbio_v7_0_init_registers(struct amdgpu_device *adev) } const struct amdgpu_nbio_funcs nbio_v7_0_funcs = { - .hdp_flush_reg = &nbio_v7_0_hdp_flush_reg, .get_hdp_flush_req_offset = nbio_v7_0_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_0_get_hdp_flush_done_offset, .get_pcie_index_offset = nbio_v7_0_get_pcie_index_offset, @@ -274,6 +301,7 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = { .hdp_flush = nbio_v7_0_hdp_flush, .get_memsize = nbio_v7_0_get_memsize, .sdma_doorbell_range = nbio_v7_0_sdma_doorbell_range, + .vcn_doorbell_range = nbio_v7_0_vcn_doorbell_range, .enable_doorbell_aperture = nbio_v7_0_enable_doorbell_aperture, .enable_doorbell_selfring_aperture = nbio_v7_0_enable_doorbell_selfring_aperture, .ih_doorbell_range = nbio_v7_0_ih_doorbell_range, @@ -283,4 +311,5 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = { .ih_control = nbio_v7_0_ih_control, .init_registers = nbio_v7_0_init_registers, .detect_hw_virt = nbio_v7_0_detect_hw_virt, + .remap_hdp_registers = nbio_v7_0_remap_hdp_registers, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h index 508d549c5029..e7aefb252550 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h @@ -26,6 +26,7 @@ #include "soc15_common.h" +extern const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg; extern const struct amdgpu_nbio_funcs nbio_v7_0_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index c69d51598cfe..0db458f9fafc 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -23,13 +23,43 @@ #include "amdgpu.h" #include "amdgpu_atombios.h" #include "nbio_v7_4.h" +#include "amdgpu_ras.h" #include "nbio/nbio_7_4_offset.h" #include "nbio/nbio_7_4_sh_mask.h" #include "nbio/nbio_7_4_0_smn.h" +#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" +#include <uapi/linux/kfd_ioctl.h> #define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c +/* + * These are nbio v7_4_1 registers mask. Temporarily define these here since + * nbio v7_4_1 header is incomplete. + */ +#define GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK 0x00001000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK 0x00002000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK 0x00004000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK 0x00008000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK 0x00010000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK 0x00020000L + +#define mmBIF_MMSCH1_DOORBELL_RANGE 0x01dc +#define mmBIF_MMSCH1_DOORBELL_RANGE_BASE_IDX 2 +//BIF_MMSCH1_DOORBELL_RANGE +#define BIF_MMSCH1_DOORBELL_RANGE__OFFSET__SHIFT 0x2 +#define BIF_MMSCH1_DOORBELL_RANGE__SIZE__SHIFT 0x10 +#define BIF_MMSCH1_DOORBELL_RANGE__OFFSET_MASK 0x00000FFCL +#define BIF_MMSCH1_DOORBELL_RANGE__SIZE_MASK 0x001F0000L + +static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} + static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); @@ -53,10 +83,9 @@ static void nbio_v7_4_hdp_flush(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); + WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else - amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( - NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0); + amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev) @@ -67,10 +96,24 @@ static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev) static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instance, bool use_doorbell, int doorbell_index, int doorbell_size) { - u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) : - SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); + u32 reg, doorbell_range; - u32 doorbell_range = RREG32(reg); + if (instance < 2) + reg = instance + + SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE); + else + /* + * These registers address of SDMA2~7 is not consecutive + * from SDMA0~1. Need plus 4 dwords offset. + * + * BIF_SDMA0_DOORBELL_RANGE: 0x3bc0 + * BIF_SDMA1_DOORBELL_RANGE: 0x3bc4 + * BIF_SDMA2_DOORBELL_RANGE: 0x3bd8 + */ + reg = instance + 0x4 + + SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE); + + doorbell_range = RREG32(reg); if (use_doorbell) { doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); @@ -81,6 +124,32 @@ static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instan WREG32(reg, doorbell_range); } +static void nbio_v7_4_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, + int doorbell_index, int instance) +{ + u32 reg; + u32 doorbell_range; + + if (instance) + reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH1_DOORBELL_RANGE); + else + reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE); + + doorbell_range = RREG32(reg); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, OFFSET, + doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, SIZE, 8); + } else + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, SIZE, 0); + + WREG32(reg, doorbell_range); +} + static void nbio_v7_4_enable_doorbell_aperture(struct amdgpu_device *adev, bool enable) { @@ -199,7 +268,7 @@ static u32 nbio_v7_4_get_pcie_data_offset(struct amdgpu_device *adev) return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2); } -static const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = { +const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = { .ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK, .ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK, .ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK, @@ -212,6 +281,12 @@ static const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = { .ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK, .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__SDMA0_MASK, .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK, + .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK, + .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK, + .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK, + .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK, + .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK, + .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK, }; static void nbio_v7_4_detect_hw_virt(struct amdgpu_device *adev) @@ -233,17 +308,208 @@ static void nbio_v7_4_detect_hw_virt(struct amdgpu_device *adev) static void nbio_v7_4_init_registers(struct amdgpu_device *adev) { - uint32_t def, data; - def = data = RREG32_PCIE(smnPCIE_CI_CNTL); - data = REG_SET_FIELD(data, PCIE_CI_CNTL, CI_SLV_ORDERING_DIS, 1); +} - if (def != data) - WREG32_PCIE(smnPCIE_CI_CNTL, data); +static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device *adev) +{ + uint32_t bif_doorbell_intr_cntl; + + bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL); + if (REG_GET_FIELD(bif_doorbell_intr_cntl, + BIF_DOORBELL_INT_CNTL, RAS_CNTLR_INTERRUPT_STATUS)) { + /* driver has to clear the interrupt status when bif ring is disabled */ + bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl, + BIF_DOORBELL_INT_CNTL, + RAS_CNTLR_INTERRUPT_CLEAR, 1); + WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); + + amdgpu_ras_global_ras_isr(adev); + } +} + +static void nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev) +{ + uint32_t bif_doorbell_intr_cntl; + + bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL); + if (REG_GET_FIELD(bif_doorbell_intr_cntl, + BIF_DOORBELL_INT_CNTL, RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) { + /* driver has to clear the interrupt status when bif ring is disabled */ + bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl, + BIF_DOORBELL_INT_CNTL, + RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1); + WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); + + amdgpu_ras_global_ras_isr(adev); + } +} + + +static int nbio_v7_4_set_ras_controller_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + /* The ras_controller_irq enablement should be done in psp bl when it + * tries to enable ras feature. Driver only need to set the correct interrupt + * vector for bare-metal and sriov use case respectively + */ + uint32_t bif_intr_cntl; + + bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL); + if (state == AMDGPU_IRQ_STATE_ENABLE) { + /* set interrupt vector select bit to 0 to select + * vetcor 1 for bare metal case */ + bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl, + BIF_INTR_CNTL, + RAS_INTR_VEC_SEL, 0); + WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl); + } + + return 0; +} + +static int nbio_v7_4_process_ras_controller_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + /* By design, the ih cookie for ras_controller_irq should be written + * to BIFring instead of general iv ring. However, due to known bif ring + * hw bug, it has to be disabled. There is no chance the process function + * will be involked. Just left it as a dummy one. + */ + return 0; +} + +static int nbio_v7_4_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + /* The ras_controller_irq enablement should be done in psp bl when it + * tries to enable ras feature. Driver only need to set the correct interrupt + * vector for bare-metal and sriov use case respectively + */ + uint32_t bif_intr_cntl; + + bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL); + if (state == AMDGPU_IRQ_STATE_ENABLE) { + /* set interrupt vector select bit to 0 to select + * vetcor 1 for bare metal case */ + bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl, + BIF_INTR_CNTL, + RAS_INTR_VEC_SEL, 0); + WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl); + } + + return 0; +} + +static int nbio_v7_4_process_err_event_athub_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + /* By design, the ih cookie for err_event_athub_irq should be written + * to BIFring instead of general iv ring. However, due to known bif ring + * hw bug, it has to be disabled. There is no chance the process function + * will be involked. Just left it as a dummy one. + */ + return 0; +} + +static const struct amdgpu_irq_src_funcs nbio_v7_4_ras_controller_irq_funcs = { + .set = nbio_v7_4_set_ras_controller_irq_state, + .process = nbio_v7_4_process_ras_controller_irq, +}; + +static const struct amdgpu_irq_src_funcs nbio_v7_4_ras_err_event_athub_irq_funcs = { + .set = nbio_v7_4_set_ras_err_event_athub_irq_state, + .process = nbio_v7_4_process_err_event_athub_irq, +}; + +static int nbio_v7_4_init_ras_controller_interrupt (struct amdgpu_device *adev) +{ + int r; + + /* init the irq funcs */ + adev->nbio.ras_controller_irq.funcs = + &nbio_v7_4_ras_controller_irq_funcs; + adev->nbio.ras_controller_irq.num_types = 1; + + /* register ras controller interrupt */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, + NBIF_7_4__SRCID__RAS_CONTROLLER_INTERRUPT, + &adev->nbio.ras_controller_irq); + if (r) + return r; + + return 0; +} + +static int nbio_v7_4_init_ras_err_event_athub_interrupt (struct amdgpu_device *adev) +{ + + int r; + + /* init the irq funcs */ + adev->nbio.ras_err_event_athub_irq.funcs = + &nbio_v7_4_ras_err_event_athub_irq_funcs; + adev->nbio.ras_err_event_athub_irq.num_types = 1; + + /* register ras err event athub interrupt */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, + NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT, + &adev->nbio.ras_err_event_athub_irq); + if (r) + return r; + + return 0; +} + +static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + uint32_t global_sts, central_sts, int_eoi; + uint32_t corr, fatal, non_fatal; + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + global_sts = RREG32_PCIE(smnRAS_GLOBAL_STATUS_LO); + corr = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, ParityErrCorr); + fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, ParityErrFatal); + non_fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, + ParityErrNonFatal); + + if (corr) + err_data->ce_count++; + if (fatal) + err_data->ue_count++; + + if (corr || fatal || non_fatal) { + central_sts = RREG32_PCIE(smnBIFL_RAS_CENTRAL_STATUS); + /* clear error status register */ + WREG32_PCIE(smnRAS_GLOBAL_STATUS_LO, global_sts); + + if (REG_GET_FIELD(central_sts, BIFL_RAS_CENTRAL_STATUS, + BIFL_RasContller_Intr_Recv)) { + /* clear interrupt status register */ + WREG32_PCIE(smnBIFL_RAS_CENTRAL_STATUS, central_sts); + int_eoi = RREG32_PCIE(smnIOHC_INTERRUPT_EOI); + int_eoi = REG_SET_FIELD(int_eoi, + IOHC_INTERRUPT_EOI, SMI_EOI, 1); + WREG32_PCIE(smnIOHC_INTERRUPT_EOI, int_eoi); + } + } +} + +static void nbio_v7_4_enable_doorbell_interrupt(struct amdgpu_device *adev, + bool enable) +{ + WREG32_FIELD15(NBIO, 0, BIF_DOORBELL_INT_CNTL, + DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1); } const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { - .hdp_flush_reg = &nbio_v7_4_hdp_flush_reg, .get_hdp_flush_req_offset = nbio_v7_4_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_4_get_hdp_flush_done_offset, .get_pcie_index_offset = nbio_v7_4_get_pcie_index_offset, @@ -253,13 +519,22 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { .hdp_flush = nbio_v7_4_hdp_flush, .get_memsize = nbio_v7_4_get_memsize, .sdma_doorbell_range = nbio_v7_4_sdma_doorbell_range, + .vcn_doorbell_range = nbio_v7_4_vcn_doorbell_range, .enable_doorbell_aperture = nbio_v7_4_enable_doorbell_aperture, .enable_doorbell_selfring_aperture = nbio_v7_4_enable_doorbell_selfring_aperture, .ih_doorbell_range = nbio_v7_4_ih_doorbell_range, + .enable_doorbell_interrupt = nbio_v7_4_enable_doorbell_interrupt, .update_medium_grain_clock_gating = nbio_v7_4_update_medium_grain_clock_gating, .update_medium_grain_light_sleep = nbio_v7_4_update_medium_grain_light_sleep, .get_clockgating_state = nbio_v7_4_get_clockgating_state, .ih_control = nbio_v7_4_ih_control, .init_registers = nbio_v7_4_init_registers, .detect_hw_virt = nbio_v7_4_detect_hw_virt, + .remap_hdp_registers = nbio_v7_4_remap_hdp_registers, + .handle_ras_controller_intr_no_bifring = nbio_v7_4_handle_ras_controller_intr_no_bifring, + .handle_ras_err_event_athub_intr_no_bifring = nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring, + .init_ras_controller_interrupt = nbio_v7_4_init_ras_controller_interrupt, + .init_ras_err_event_athub_interrupt = nbio_v7_4_init_ras_err_event_athub_interrupt, + .query_ras_error_count = nbio_v7_4_query_ras_error_count, + .ras_late_init = amdgpu_nbio_ras_late_init, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h index c442865bac4f..b1ac82872752 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h @@ -26,6 +26,7 @@ #include "soc15_common.h" +extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg; extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c new file mode 100644 index 000000000000..0ba66bef5746 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -0,0 +1,991 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <linux/firmware.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/pci.h> + +#include "amdgpu.h" +#include "amdgpu_atombios.h" +#include "amdgpu_ih.h" +#include "amdgpu_uvd.h" +#include "amdgpu_vce.h" +#include "amdgpu_ucode.h" +#include "amdgpu_psp.h" +#include "amdgpu_smu.h" +#include "atom.h" +#include "amd_pcie.h" + +#include "gc/gc_10_1_0_offset.h" +#include "gc/gc_10_1_0_sh_mask.h" +#include "hdp/hdp_5_0_0_offset.h" +#include "hdp/hdp_5_0_0_sh_mask.h" +#include "smuio/smuio_11_0_0_offset.h" + +#include "soc15.h" +#include "soc15_common.h" +#include "gmc_v10_0.h" +#include "gfxhub_v2_0.h" +#include "mmhub_v2_0.h" +#include "nbio_v2_3.h" +#include "nv.h" +#include "navi10_ih.h" +#include "gfx_v10_0.h" +#include "sdma_v5_0.h" +#include "vcn_v2_0.h" +#include "dce_virtual.h" +#include "mes_v10_1.h" +#include "mxgpu_nv.h" + +static const struct amd_ip_funcs nv_common_ip_funcs; + +/* + * Indirect registers accessor + */ +static u32 nv_pcie_rreg(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags, address, data; + u32 r; + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, reg); + (void)RREG32(address); + r = RREG32(data); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + return r; +} + +static void nv_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) +{ + unsigned long flags, address, data; + + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, reg); + (void)RREG32(address); + WREG32(data, v); + (void)RREG32(data); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + +static u32 nv_didt_rreg(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags, address, data; + u32 r; + + address = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_INDEX); + data = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_DATA); + + spin_lock_irqsave(&adev->didt_idx_lock, flags); + WREG32(address, (reg)); + r = RREG32(data); + spin_unlock_irqrestore(&adev->didt_idx_lock, flags); + return r; +} + +static void nv_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v) +{ + unsigned long flags, address, data; + + address = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_INDEX); + data = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_DATA); + + spin_lock_irqsave(&adev->didt_idx_lock, flags); + WREG32(address, (reg)); + WREG32(data, (v)); + spin_unlock_irqrestore(&adev->didt_idx_lock, flags); +} + +static u32 nv_get_config_memsize(struct amdgpu_device *adev) +{ + return adev->nbio.funcs->get_memsize(adev); +} + +static u32 nv_get_xclk(struct amdgpu_device *adev) +{ + return adev->clock.spll.reference_freq; +} + + +void nv_grbm_select(struct amdgpu_device *adev, + u32 me, u32 pipe, u32 queue, u32 vmid) +{ + u32 grbm_gfx_cntl = 0; + grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, PIPEID, pipe); + grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, MEID, me); + grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid); + grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL), grbm_gfx_cntl); +} + +static void nv_vga_set_state(struct amdgpu_device *adev, bool state) +{ + /* todo */ +} + +static bool nv_read_disabled_bios(struct amdgpu_device *adev) +{ + /* todo */ + return false; +} + +static bool nv_read_bios_from_rom(struct amdgpu_device *adev, + u8 *bios, u32 length_bytes) +{ + u32 *dw_ptr; + u32 i, length_dw; + + if (bios == NULL) + return false; + if (length_bytes == 0) + return false; + /* APU vbios image is part of sbios image */ + if (adev->flags & AMD_IS_APU) + return false; + + dw_ptr = (u32 *)bios; + length_dw = ALIGN(length_bytes, 4) / 4; + + /* set rom index to 0 */ + WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX), 0); + /* read out the rom data */ + for (i = 0; i < length_dw; i++) + dw_ptr[i] = RREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA)); + + return true; +} + +static struct soc15_allowed_register_entry nv_allowed_read_registers[] = { + { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS)}, + { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS2)}, + { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE0)}, + { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE1)}, + { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE2)}, + { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE3)}, +#if 0 /* TODO: will set it when SDMA header is available */ + { SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_STATUS_REG)}, + { SOC15_REG_ENTRY(SDMA1, 0, mmSDMA1_STATUS_REG)}, +#endif + { SOC15_REG_ENTRY(GC, 0, mmCP_STAT)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT1)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT2)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT3)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_BUSY_STAT)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STALLED_STAT1)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STATUS)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_BUSY_STAT)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)}, + { SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)}, +}; + +static uint32_t nv_read_indexed_register(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 reg_offset) +{ + uint32_t val; + + mutex_lock(&adev->grbm_idx_mutex); + if (se_num != 0xffffffff || sh_num != 0xffffffff) + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + + val = RREG32(reg_offset); + + if (se_num != 0xffffffff || sh_num != 0xffffffff) + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + return val; +} + +static uint32_t nv_get_register_value(struct amdgpu_device *adev, + bool indexed, u32 se_num, + u32 sh_num, u32 reg_offset) +{ + if (indexed) { + return nv_read_indexed_register(adev, se_num, sh_num, reg_offset); + } else { + if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG)) + return adev->gfx.config.gb_addr_config; + return RREG32(reg_offset); + } +} + +static int nv_read_register(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 reg_offset, u32 *value) +{ + uint32_t i; + struct soc15_allowed_register_entry *en; + + *value = 0; + for (i = 0; i < ARRAY_SIZE(nv_allowed_read_registers); i++) { + en = &nv_allowed_read_registers[i]; + if (reg_offset != + (adev->reg_offset[en->hwip][en->inst][en->seg] + en->reg_offset)) + continue; + + *value = nv_get_register_value(adev, + nv_allowed_read_registers[i].grbm_indexed, + se_num, sh_num, reg_offset); + return 0; + } + return -EINVAL; +} + +#if 0 +static void nv_gpu_pci_config_reset(struct amdgpu_device *adev) +{ + u32 i; + + dev_info(adev->dev, "GPU pci config reset\n"); + + /* disable BM */ + pci_clear_master(adev->pdev); + /* reset */ + amdgpu_pci_config_reset(adev); + + udelay(100); + + /* wait for asic to come out of reset */ + for (i = 0; i < adev->usec_timeout; i++) { + u32 memsize = nbio_v2_3_get_memsize(adev); + if (memsize != 0xffffffff) + break; + udelay(1); + } + +} +#endif + +static int nv_asic_mode1_reset(struct amdgpu_device *adev) +{ + u32 i; + int ret = 0; + + amdgpu_atombios_scratch_regs_engine_hung(adev, true); + + dev_info(adev->dev, "GPU mode1 reset\n"); + + /* disable BM */ + pci_clear_master(adev->pdev); + + pci_save_state(adev->pdev); + + ret = psp_gpu_reset(adev); + if (ret) + dev_err(adev->dev, "GPU mode1 reset failed\n"); + + pci_restore_state(adev->pdev); + + /* wait for asic to come out of reset */ + for (i = 0; i < adev->usec_timeout; i++) { + u32 memsize = adev->nbio.funcs->get_memsize(adev); + + if (memsize != 0xffffffff) + break; + udelay(1); + } + + amdgpu_atombios_scratch_regs_engine_hung(adev, false); + + return ret; +} + +static enum amd_reset_method +nv_asic_reset_method(struct amdgpu_device *adev) +{ + struct smu_context *smu = &adev->smu; + + if (!amdgpu_sriov_vf(adev) && smu_baco_is_support(smu)) + return AMD_RESET_METHOD_BACO; + else + return AMD_RESET_METHOD_MODE1; +} + +static int nv_asic_reset(struct amdgpu_device *adev) +{ + + /* FIXME: it doesn't work since vega10 */ +#if 0 + amdgpu_atombios_scratch_regs_engine_hung(adev, true); + + nv_gpu_pci_config_reset(adev); + + amdgpu_atombios_scratch_regs_engine_hung(adev, false); +#endif + int ret = 0; + struct smu_context *smu = &adev->smu; + + if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); + ret = smu_baco_reset(smu); + } else { + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); + ret = nv_asic_mode1_reset(adev); + } + + return ret; +} + +static int nv_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk) +{ + /* todo */ + return 0; +} + +static int nv_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk) +{ + /* todo */ + return 0; +} + +static void nv_pcie_gen3_enable(struct amdgpu_device *adev) +{ + if (pci_is_root_bus(adev->pdev->bus)) + return; + + if (amdgpu_pcie_gen2 == 0) + return; + + if (!(adev->pm.pcie_gen_mask & (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | + CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3))) + return; + + /* todo */ +} + +static void nv_program_aspm(struct amdgpu_device *adev) +{ + + if (amdgpu_aspm == 0) + return; + + /* todo */ +} + +static void nv_enable_doorbell_aperture(struct amdgpu_device *adev, + bool enable) +{ + adev->nbio.funcs->enable_doorbell_aperture(adev, enable); + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable); +} + +static const struct amdgpu_ip_block_version nv_common_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_COMMON, + .major = 1, + .minor = 0, + .rev = 0, + .funcs = &nv_common_ip_funcs, +}; + +static int nv_reg_base_init(struct amdgpu_device *adev) +{ + int r; + + if (amdgpu_discovery) { + r = amdgpu_discovery_reg_base_init(adev); + if (r) { + DRM_WARN("failed to init reg base from ip discovery table, " + "fallback to legacy init method\n"); + goto legacy_init; + } + + return 0; + } + +legacy_init: + switch (adev->asic_type) { + case CHIP_NAVI10: + navi10_reg_base_init(adev); + break; + case CHIP_NAVI14: + navi14_reg_base_init(adev); + break; + case CHIP_NAVI12: + navi12_reg_base_init(adev); + break; + default: + return -EINVAL; + } + + return 0; +} + +int nv_set_ip_blocks(struct amdgpu_device *adev) +{ + int r; + + /* Set IP register base before any HW register access */ + r = nv_reg_base_init(adev); + if (r) + return r; + + adev->nbio.funcs = &nbio_v2_3_funcs; + adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; + + adev->nbio.funcs->detect_hw_virt(adev); + + if (amdgpu_sriov_vf(adev)) + adev->virt.ops = &xgpu_nv_virt_ops; + + switch (adev->asic_type) { + case CHIP_NAVI10: + case CHIP_NAVI14: + amdgpu_device_ip_block_add(adev, &nv_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && + is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); +#if defined(CONFIG_DRM_AMD_DC) + else if (amdgpu_device_has_dc_support(adev)) + amdgpu_device_ip_block_add(adev, &dm_ip_block); +#endif + amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && + is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); + if (adev->enable_mes) + amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block); + break; + case CHIP_NAVI12: + amdgpu_device_ip_block_add(adev, &nv_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && + is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); +#if defined(CONFIG_DRM_AMD_DC) + else if (amdgpu_device_has_dc_support(adev)) + amdgpu_device_ip_block_add(adev, &dm_ip_block); +#endif + amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && + is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); + break; + default: + return -EINVAL; + } + + return 0; +} + +static uint32_t nv_get_rev_id(struct amdgpu_device *adev) +{ + return adev->nbio.funcs->get_rev_id(adev); +} + +static void nv_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) +{ + adev->nbio.funcs->hdp_flush(adev, ring); +} + +static void nv_invalidate_hdp(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) { + WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_READ_CACHE_INVALIDATE, 1); + } else { + amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( + HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); + } +} + +static bool nv_need_full_reset(struct amdgpu_device *adev) +{ + return true; +} + +static void nv_get_pcie_usage(struct amdgpu_device *adev, + uint64_t *count0, + uint64_t *count1) +{ + /*TODO*/ +} + +static bool nv_need_reset_on_init(struct amdgpu_device *adev) +{ +#if 0 + u32 sol_reg; + + if (adev->flags & AMD_IS_APU) + return false; + + /* Check sOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + if (sol_reg) + return true; +#endif + /* TODO: re-enable it when mode1 reset is functional */ + return false; +} + +static uint64_t nv_get_pcie_replay_count(struct amdgpu_device *adev) +{ + + /* TODO + * dummy implement for pcie_replay_count sysfs interface + * */ + + return 0; +} + +static void nv_init_doorbell_index(struct amdgpu_device *adev) +{ + adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ; + adev->doorbell_index.mec_ring0 = AMDGPU_NAVI10_DOORBELL_MEC_RING0; + adev->doorbell_index.mec_ring1 = AMDGPU_NAVI10_DOORBELL_MEC_RING1; + adev->doorbell_index.mec_ring2 = AMDGPU_NAVI10_DOORBELL_MEC_RING2; + adev->doorbell_index.mec_ring3 = AMDGPU_NAVI10_DOORBELL_MEC_RING3; + adev->doorbell_index.mec_ring4 = AMDGPU_NAVI10_DOORBELL_MEC_RING4; + adev->doorbell_index.mec_ring5 = AMDGPU_NAVI10_DOORBELL_MEC_RING5; + adev->doorbell_index.mec_ring6 = AMDGPU_NAVI10_DOORBELL_MEC_RING6; + adev->doorbell_index.mec_ring7 = AMDGPU_NAVI10_DOORBELL_MEC_RING7; + adev->doorbell_index.userqueue_start = AMDGPU_NAVI10_DOORBELL_USERQUEUE_START; + adev->doorbell_index.userqueue_end = AMDGPU_NAVI10_DOORBELL_USERQUEUE_END; + adev->doorbell_index.gfx_ring0 = AMDGPU_NAVI10_DOORBELL_GFX_RING0; + adev->doorbell_index.gfx_ring1 = AMDGPU_NAVI10_DOORBELL_GFX_RING1; + adev->doorbell_index.sdma_engine[0] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0; + adev->doorbell_index.sdma_engine[1] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1; + adev->doorbell_index.ih = AMDGPU_NAVI10_DOORBELL_IH; + adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_NAVI10_DOORBELL64_VCN0_1; + adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_NAVI10_DOORBELL64_VCN2_3; + adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_NAVI10_DOORBELL64_VCN4_5; + adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_NAVI10_DOORBELL64_VCN6_7; + adev->doorbell_index.first_non_cp = AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP; + adev->doorbell_index.last_non_cp = AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP; + + adev->doorbell_index.max_assignment = AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT << 1; + adev->doorbell_index.sdma_doorbell_range = 20; +} + +static const struct amdgpu_asic_funcs nv_asic_funcs = +{ + .read_disabled_bios = &nv_read_disabled_bios, + .read_bios_from_rom = &nv_read_bios_from_rom, + .read_register = &nv_read_register, + .reset = &nv_asic_reset, + .reset_method = &nv_asic_reset_method, + .set_vga_state = &nv_vga_set_state, + .get_xclk = &nv_get_xclk, + .set_uvd_clocks = &nv_set_uvd_clocks, + .set_vce_clocks = &nv_set_vce_clocks, + .get_config_memsize = &nv_get_config_memsize, + .flush_hdp = &nv_flush_hdp, + .invalidate_hdp = &nv_invalidate_hdp, + .init_doorbell_index = &nv_init_doorbell_index, + .need_full_reset = &nv_need_full_reset, + .get_pcie_usage = &nv_get_pcie_usage, + .need_reset_on_init = &nv_need_reset_on_init, + .get_pcie_replay_count = &nv_get_pcie_replay_count, +}; + +static int nv_common_early_init(void *handle) +{ +#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; + adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; + adev->smc_rreg = NULL; + adev->smc_wreg = NULL; + adev->pcie_rreg = &nv_pcie_rreg; + adev->pcie_wreg = &nv_pcie_wreg; + + /* TODO: will add them during VCN v2 implementation */ + adev->uvd_ctx_rreg = NULL; + adev->uvd_ctx_wreg = NULL; + + adev->didt_rreg = &nv_didt_rreg; + adev->didt_wreg = &nv_didt_wreg; + + adev->asic_funcs = &nv_asic_funcs; + + adev->rev_id = nv_get_rev_id(adev); + adev->external_rev_id = 0xff; + switch (adev->asic_type) { + case CHIP_NAVI10: + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS; + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_ATHUB; + adev->external_rev_id = adev->rev_id + 0x1; + break; + case CHIP_NAVI14: + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS; + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG; + adev->external_rev_id = adev->rev_id + 20; + break; + case CHIP_NAVI12: + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_GFX_RLC_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_VCN_MGCG; + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_ATHUB; + adev->external_rev_id = adev->rev_id + 0xa; + break; + default: + /* FIXME: not supported yet */ + return -EINVAL; + } + + if (amdgpu_sriov_vf(adev)) { + amdgpu_virt_init_setting(adev); + xgpu_nv_mailbox_set_irq_funcs(adev); + } + + return 0; +} + +static int nv_common_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + xgpu_nv_mailbox_get_irq(adev); + + return 0; +} + +static int nv_common_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + xgpu_nv_mailbox_add_irq_id(adev); + + return 0; +} + +static int nv_common_sw_fini(void *handle) +{ + return 0; +} + +static int nv_common_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* enable pcie gen2/3 link */ + nv_pcie_gen3_enable(adev); + /* enable aspm */ + nv_program_aspm(adev); + /* setup nbio registers */ + adev->nbio.funcs->init_registers(adev); + /* remap HDP registers to a hole in mmio space, + * for the purpose of expose those registers + * to process space + */ + if (adev->nbio.funcs->remap_hdp_registers) + adev->nbio.funcs->remap_hdp_registers(adev); + /* enable the doorbell aperture */ + nv_enable_doorbell_aperture(adev, true); + + return 0; +} + +static int nv_common_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* disable the doorbell aperture */ + nv_enable_doorbell_aperture(adev, false); + + return 0; +} + +static int nv_common_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return nv_common_hw_fini(adev); +} + +static int nv_common_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return nv_common_hw_init(adev); +} + +static bool nv_common_is_idle(void *handle) +{ + return true; +} + +static int nv_common_wait_for_idle(void *handle) +{ + return 0; +} + +static int nv_common_soft_reset(void *handle) +{ + return 0; +} + +static void nv_update_hdp_mem_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t hdp_clk_cntl, hdp_clk_cntl1; + uint32_t hdp_mem_pwr_cntl; + + if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD))) + return; + + hdp_clk_cntl = hdp_clk_cntl1 = RREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL); + hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL); + + /* Before doing clock/power mode switch, + * forced on IPH & RC clock */ + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + IPH_MEM_CLK_SOFT_OVERRIDE, 1); + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + RC_MEM_CLK_SOFT_OVERRIDE, 1); + WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl); + + /* HDP 5.0 doesn't support dynamic power mode switch, + * disable clock and power gating before any changing */ + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_CTRL_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_LS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_DS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_SD_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_CTRL_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_SD_EN, 0); + WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); + + /* only one clock gating mode (LS/DS/SD) can be enabled */ + if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_LS_EN, enable); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, enable); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_DS_EN, enable); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, enable); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_SD_EN, enable); + /* RC should not use shut down mode, fallback to ds */ + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, enable); + } + + WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); + + /* restore IPH & RC clock override after clock/power mode changing */ + WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl1); +} + +static void nv_update_hdp_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t hdp_clk_cntl; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG)) + return; + + hdp_clk_cntl = RREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL); + + if (enable) { + hdp_clk_cntl &= + ~(uint32_t) + (HDP_CLK_CNTL__IPH_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK); + } else { + hdp_clk_cntl |= HDP_CLK_CNTL__IPH_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK; + } + + WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl); +} + +static int nv_common_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (adev->asic_type) { + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + adev->nbio.funcs->update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + adev->nbio.funcs->update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + nv_update_hdp_mem_power_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + nv_update_hdp_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: + break; + } + return 0; +} + +static int nv_common_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + /* TODO */ + return 0; +} + +static void nv_common_get_clockgating_state(void *handle, u32 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t tmp; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + adev->nbio.funcs->get_clockgating_state(adev, flags); + + /* AMD_CG_SUPPORT_HDP_MGCG */ + tmp = RREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL); + if (!(tmp & (HDP_CLK_CNTL__IPH_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK))) + *flags |= AMD_CG_SUPPORT_HDP_MGCG; + + /* AMD_CG_SUPPORT_HDP_LS/DS/SD */ + tmp = RREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL); + if (tmp & HDP_MEM_POWER_CTRL__IPH_MEM_POWER_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_LS; + else if (tmp & HDP_MEM_POWER_CTRL__IPH_MEM_POWER_DS_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_DS; + else if (tmp & HDP_MEM_POWER_CTRL__IPH_MEM_POWER_SD_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_SD; + + return; +} + +static const struct amd_ip_funcs nv_common_ip_funcs = { + .name = "nv_common", + .early_init = nv_common_early_init, + .late_init = nv_common_late_init, + .sw_init = nv_common_sw_init, + .sw_fini = nv_common_sw_fini, + .hw_init = nv_common_hw_init, + .hw_fini = nv_common_hw_fini, + .suspend = nv_common_suspend, + .resume = nv_common_resume, + .is_idle = nv_common_is_idle, + .wait_for_idle = nv_common_wait_for_idle, + .soft_reset = nv_common_soft_reset, + .set_clockgating_state = nv_common_set_clockgating_state, + .set_powergating_state = nv_common_set_powergating_state, + .get_clockgating_state = nv_common_get_clockgating_state, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h new file mode 100644 index 000000000000..82e6cb432f3d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nv.h @@ -0,0 +1,35 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __NV_H__ +#define __NV_H__ + +#include "nbio_v2_3.h" + +void nv_grbm_select(struct amdgpu_device *adev, + u32 me, u32 pipe, u32 queue, u32 vmid); +int nv_set_ip_blocks(struct amdgpu_device *adev); +int navi10_reg_base_init(struct amdgpu_device *adev); +int navi14_reg_base_init(struct amdgpu_device *adev); +int navi12_reg_base_init(struct amdgpu_device *adev); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/nvd.h b/drivers/gpu/drm/amd/amdgpu/nvd.h new file mode 100644 index 000000000000..1de984647dbb --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nvd.h @@ -0,0 +1,418 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef NVD_H +#define NVD_H + +/** + * Navi's PM4 definitions + */ +#define PACKET_TYPE0 0 +#define PACKET_TYPE1 1 +#define PACKET_TYPE2 2 +#define PACKET_TYPE3 3 + +#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) +#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) +#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF) +#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) +#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ + ((reg) & 0xFFFF) | \ + ((n) & 0x3FFF) << 16) +#define CP_PACKET2 0x80000000 +#define PACKET2_PAD_SHIFT 0 +#define PACKET2_PAD_MASK (0x3fffffff << 0) + +#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) + +#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ + (((op) & 0xFF) << 8) | \ + ((n) & 0x3FFF) << 16) + +#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1) + +/* Packet 3 types */ +#define PACKET3_NOP 0x10 +#define PACKET3_SET_BASE 0x11 +#define PACKET3_BASE_INDEX(x) ((x) << 0) +#define CE_PARTITION_BASE 3 +#define PACKET3_CLEAR_STATE 0x12 +#define PACKET3_INDEX_BUFFER_SIZE 0x13 +#define PACKET3_DISPATCH_DIRECT 0x15 +#define PACKET3_DISPATCH_INDIRECT 0x16 +#define PACKET3_INDIRECT_BUFFER_END 0x17 +#define PACKET3_INDIRECT_BUFFER_CNST_END 0x19 +#define PACKET3_ATOMIC_GDS 0x1D +#define PACKET3_ATOMIC_MEM 0x1E +#define PACKET3_OCCLUSION_QUERY 0x1F +#define PACKET3_SET_PREDICATION 0x20 +#define PACKET3_REG_RMW 0x21 +#define PACKET3_COND_EXEC 0x22 +#define PACKET3_PRED_EXEC 0x23 +#define PACKET3_DRAW_INDIRECT 0x24 +#define PACKET3_DRAW_INDEX_INDIRECT 0x25 +#define PACKET3_INDEX_BASE 0x26 +#define PACKET3_DRAW_INDEX_2 0x27 +#define PACKET3_CONTEXT_CONTROL 0x28 +#define PACKET3_INDEX_TYPE 0x2A +#define PACKET3_DRAW_INDIRECT_MULTI 0x2C +#define PACKET3_DRAW_INDEX_AUTO 0x2D +#define PACKET3_NUM_INSTANCES 0x2F +#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30 +#define PACKET3_INDIRECT_BUFFER_PRIV 0x32 +#define PACKET3_INDIRECT_BUFFER_CNST 0x33 +#define PACKET3_COND_INDIRECT_BUFFER_CNST 0x33 +#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34 +#define PACKET3_DRAW_INDEX_OFFSET_2 0x35 +#define PACKET3_DRAW_PREAMBLE 0x36 +#define PACKET3_WRITE_DATA 0x37 +#define WRITE_DATA_DST_SEL(x) ((x) << 8) + /* 0 - register + * 1 - memory (sync - via GRBM) + * 2 - gl2 + * 3 - gds + * 4 - reserved + * 5 - memory (async - direct) + */ +#define WR_ONE_ADDR (1 << 16) +#define WR_CONFIRM (1 << 20) +#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25) + /* 0 - LRU + * 1 - Stream + */ +#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) + /* 0 - me + * 1 - pfp + * 2 - ce + */ +#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38 +#define PACKET3_MEM_SEMAPHORE 0x39 +# define PACKET3_SEM_USE_MAILBOX (0x1 << 16) +# define PACKET3_SEM_SEL_SIGNAL_TYPE (0x1 << 20) /* 0 = increment, 1 = write 1 */ +# define PACKET3_SEM_SEL_SIGNAL (0x6 << 29) +# define PACKET3_SEM_SEL_WAIT (0x7 << 29) +#define PACKET3_DRAW_INDEX_MULTI_INST 0x3A +#define PACKET3_COPY_DW 0x3B +#define PACKET3_WAIT_REG_MEM 0x3C +#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0) + /* 0 - always + * 1 - < + * 2 - <= + * 3 - == + * 4 - != + * 5 - >= + * 6 - > + */ +#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4) + /* 0 - reg + * 1 - mem + */ +#define WAIT_REG_MEM_OPERATION(x) ((x) << 6) + /* 0 - wait_reg_mem + * 1 - wr_wait_wr_reg + */ +#define WAIT_REG_MEM_ENGINE(x) ((x) << 8) + /* 0 - me + * 1 - pfp + */ +#define PACKET3_INDIRECT_BUFFER 0x3F +#define INDIRECT_BUFFER_VALID (1 << 23) +#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28) + /* 0 - LRU + * 1 - Stream + * 2 - Bypass + */ +#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21) +#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30) +#define PACKET3_COND_INDIRECT_BUFFER 0x3F +#define PACKET3_COPY_DATA 0x40 +#define PACKET3_CP_DMA 0x41 +#define PACKET3_PFP_SYNC_ME 0x42 +#define PACKET3_SURFACE_SYNC 0x43 +#define PACKET3_ME_INITIALIZE 0x44 +#define PACKET3_COND_WRITE 0x45 +#define PACKET3_EVENT_WRITE 0x46 +#define EVENT_TYPE(x) ((x) << 0) +#define EVENT_INDEX(x) ((x) << 8) + /* 0 - any non-TS event + * 1 - ZPASS_DONE, PIXEL_PIPE_STAT_* + * 2 - SAMPLE_PIPELINESTAT + * 3 - SAMPLE_STREAMOUTSTAT* + * 4 - *S_PARTIAL_FLUSH + */ +#define PACKET3_EVENT_WRITE_EOP 0x47 +#define PACKET3_EVENT_WRITE_EOS 0x48 +#define PACKET3_RELEASE_MEM 0x49 +#define PACKET3_RELEASE_MEM_EVENT_TYPE(x) ((x) << 0) +#define PACKET3_RELEASE_MEM_EVENT_INDEX(x) ((x) << 8) +#define PACKET3_RELEASE_MEM_GCR_GLM_WB (1 << 12) +#define PACKET3_RELEASE_MEM_GCR_GLM_INV (1 << 13) +#define PACKET3_RELEASE_MEM_GCR_GLV_INV (1 << 14) +#define PACKET3_RELEASE_MEM_GCR_GL1_INV (1 << 15) +#define PACKET3_RELEASE_MEM_GCR_GL2_US (1 << 16) +#define PACKET3_RELEASE_MEM_GCR_GL2_RANGE (1 << 17) +#define PACKET3_RELEASE_MEM_GCR_GL2_DISCARD (1 << 19) +#define PACKET3_RELEASE_MEM_GCR_GL2_INV (1 << 20) +#define PACKET3_RELEASE_MEM_GCR_GL2_WB (1 << 21) +#define PACKET3_RELEASE_MEM_GCR_SEQ (1 << 22) +#define PACKET3_RELEASE_MEM_CACHE_POLICY(x) ((x) << 25) + /* 0 - cache_policy__me_release_mem__lru + * 1 - cache_policy__me_release_mem__stream + * 2 - cache_policy__me_release_mem__noa + * 3 - cache_policy__me_release_mem__bypass + */ +#define PACKET3_RELEASE_MEM_EXECUTE (1 << 28) + +#define PACKET3_RELEASE_MEM_DATA_SEL(x) ((x) << 29) + /* 0 - discard + * 1 - send low 32bit data + * 2 - send 64bit data + * 3 - send 64bit GPU counter value + * 4 - send 64bit sys counter value + */ +#define PACKET3_RELEASE_MEM_INT_SEL(x) ((x) << 24) + /* 0 - none + * 1 - interrupt only (DATA_SEL = 0) + * 2 - interrupt when data write is confirmed + */ +#define PACKET3_RELEASE_MEM_DST_SEL(x) ((x) << 16) + /* 0 - MC + * 1 - TC/L2 + */ + + + +#define PACKET3_PREAMBLE_CNTL 0x4A +# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28) +# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28) +#define PACKET3_DMA_DATA 0x50 +/* 1. header + * 2. CONTROL + * 3. SRC_ADDR_LO or DATA [31:0] + * 4. SRC_ADDR_HI [31:0] + * 5. DST_ADDR_LO [31:0] + * 6. DST_ADDR_HI [7:0] + * 7. COMMAND [31:26] | BYTE_COUNT [25:0] + */ +/* CONTROL */ +# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0) + /* 0 - ME + * 1 - PFP + */ +# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13) + /* 0 - LRU + * 1 - Stream + */ +# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20) + /* 0 - DST_ADDR using DAS + * 1 - GDS + * 3 - DST_ADDR using L2 + */ +# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25) + /* 0 - LRU + * 1 - Stream + */ +# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29) + /* 0 - SRC_ADDR using SAS + * 1 - GDS + * 2 - DATA + * 3 - SRC_ADDR using L2 + */ +# define PACKET3_DMA_DATA_CP_SYNC (1 << 31) +/* COMMAND */ +# define PACKET3_DMA_DATA_CMD_SAS (1 << 26) + /* 0 - memory + * 1 - register + */ +# define PACKET3_DMA_DATA_CMD_DAS (1 << 27) + /* 0 - memory + * 1 - register + */ +# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) +# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) +# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) +#define PACKET3_CONTEXT_REG_RMW 0x51 +#define PACKET3_GFX_CNTX_UPDATE 0x52 +#define PACKET3_BLK_CNTX_UPDATE 0x53 +#define PACKET3_INCR_UPDT_STATE 0x55 +#define PACKET3_ACQUIRE_MEM 0x58 +#define PACKET3_REWIND 0x59 +#define PACKET3_INTERRUPT 0x5A +#define PACKET3_GEN_PDEPTE 0x5B +#define PACKET3_INDIRECT_BUFFER_PASID 0x5C +#define PACKET3_PRIME_UTCL2 0x5D +#define PACKET3_LOAD_UCONFIG_REG 0x5E +#define PACKET3_LOAD_SH_REG 0x5F +#define PACKET3_LOAD_CONFIG_REG 0x60 +#define PACKET3_LOAD_CONTEXT_REG 0x61 +#define PACKET3_LOAD_COMPUTE_STATE 0x62 +#define PACKET3_LOAD_SH_REG_INDEX 0x63 +#define PACKET3_SET_CONFIG_REG 0x68 +#define PACKET3_SET_CONFIG_REG_START 0x00002000 +#define PACKET3_SET_CONFIG_REG_END 0x00002c00 +#define PACKET3_SET_CONTEXT_REG 0x69 +#define PACKET3_SET_CONTEXT_REG_START 0x0000a000 +#define PACKET3_SET_CONTEXT_REG_END 0x0000a400 +#define PACKET3_SET_CONTEXT_REG_INDEX 0x6A +#define PACKET3_SET_VGPR_REG_DI_MULTI 0x71 +#define PACKET3_SET_SH_REG_DI 0x72 +#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73 +#define PACKET3_SET_SH_REG_DI_MULTI 0x74 +#define PACKET3_GFX_PIPE_LOCK 0x75 +#define PACKET3_SET_SH_REG 0x76 +#define PACKET3_SET_SH_REG_START 0x00002c00 +#define PACKET3_SET_SH_REG_END 0x00003000 +#define PACKET3_SET_SH_REG_OFFSET 0x77 +#define PACKET3_SET_QUEUE_REG 0x78 +#define PACKET3_SET_UCONFIG_REG 0x79 +#define PACKET3_SET_UCONFIG_REG_START 0x0000c000 +#define PACKET3_SET_UCONFIG_REG_END 0x0000c400 +#define PACKET3_SET_UCONFIG_REG_INDEX 0x7A +#define PACKET3_FORWARD_HEADER 0x7C +#define PACKET3_SCRATCH_RAM_WRITE 0x7D +#define PACKET3_SCRATCH_RAM_READ 0x7E +#define PACKET3_LOAD_CONST_RAM 0x80 +#define PACKET3_WRITE_CONST_RAM 0x81 +#define PACKET3_DUMP_CONST_RAM 0x83 +#define PACKET3_INCREMENT_CE_COUNTER 0x84 +#define PACKET3_INCREMENT_DE_COUNTER 0x85 +#define PACKET3_WAIT_ON_CE_COUNTER 0x86 +#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 +#define PACKET3_SWITCH_BUFFER 0x8B +#define PACKET3_DISPATCH_DRAW_PREAMBLE 0x8C +#define PACKET3_DISPATCH_DRAW_PREAMBLE_ACE 0x8C +#define PACKET3_DISPATCH_DRAW 0x8D +#define PACKET3_DISPATCH_DRAW_ACE 0x8D +#define PACKET3_GET_LOD_STATS 0x8E +#define PACKET3_DRAW_MULTI_PREAMBLE 0x8F +#define PACKET3_FRAME_CONTROL 0x90 +# define FRAME_CMD(x) ((x) << 28) + /* + * x=0: tmz_begin + * x=1: tmz_end + */ +#define PACKET3_INDEX_ATTRIBUTES_INDIRECT 0x91 +#define PACKET3_WAIT_REG_MEM64 0x93 +#define PACKET3_COND_PREEMPT 0x94 +#define PACKET3_HDP_FLUSH 0x95 +#define PACKET3_COPY_DATA_RB 0x96 +#define PACKET3_INVALIDATE_TLBS 0x98 +# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0) +# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4) +# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5) +#define PACKET3_AQL_PACKET 0x99 +#define PACKET3_DMA_DATA_FILL_MULTI 0x9A +#define PACKET3_SET_SH_REG_INDEX 0x9B +#define PACKET3_DRAW_INDIRECT_COUNT_MULTI 0x9C +#define PACKET3_DRAW_INDEX_INDIRECT_COUNT_MULTI 0x9D +#define PACKET3_DUMP_CONST_RAM_OFFSET 0x9E +#define PACKET3_LOAD_CONTEXT_REG_INDEX 0x9F +#define PACKET3_SET_RESOURCES 0xA0 +/* 1. header + * 2. CONTROL + * 3. QUEUE_MASK_LO [31:0] + * 4. QUEUE_MASK_HI [31:0] + * 5. GWS_MASK_LO [31:0] + * 6. GWS_MASK_HI [31:0] + * 7. OAC_MASK [15:0] + * 8. GDS_HEAP_SIZE [16:11] | GDS_HEAP_BASE [5:0] + */ +# define PACKET3_SET_RESOURCES_VMID_MASK(x) ((x) << 0) +# define PACKET3_SET_RESOURCES_UNMAP_LATENTY(x) ((x) << 16) +# define PACKET3_SET_RESOURCES_QUEUE_TYPE(x) ((x) << 29) +#define PACKET3_MAP_PROCESS 0xA1 +#define PACKET3_MAP_QUEUES 0xA2 +/* 1. header + * 2. CONTROL + * 3. CONTROL2 + * 4. MQD_ADDR_LO [31:0] + * 5. MQD_ADDR_HI [31:0] + * 6. WPTR_ADDR_LO [31:0] + * 7. WPTR_ADDR_HI [31:0] + */ +/* CONTROL */ +# define PACKET3_MAP_QUEUES_QUEUE_SEL(x) ((x) << 4) +# define PACKET3_MAP_QUEUES_VMID(x) ((x) << 8) +# define PACKET3_MAP_QUEUES_QUEUE(x) ((x) << 13) +# define PACKET3_MAP_QUEUES_PIPE(x) ((x) << 16) +# define PACKET3_MAP_QUEUES_ME(x) ((x) << 18) +# define PACKET3_MAP_QUEUES_QUEUE_TYPE(x) ((x) << 21) +# define PACKET3_MAP_QUEUES_ALLOC_FORMAT(x) ((x) << 24) +# define PACKET3_MAP_QUEUES_ENGINE_SEL(x) ((x) << 26) +# define PACKET3_MAP_QUEUES_NUM_QUEUES(x) ((x) << 29) +/* CONTROL2 */ +# define PACKET3_MAP_QUEUES_CHECK_DISABLE(x) ((x) << 1) +# define PACKET3_MAP_QUEUES_DOORBELL_OFFSET(x) ((x) << 2) +#define PACKET3_UNMAP_QUEUES 0xA3 +/* 1. header + * 2. CONTROL + * 3. CONTROL2 + * 4. CONTROL3 + * 5. CONTROL4 + * 6. CONTROL5 + */ +/* CONTROL */ +# define PACKET3_UNMAP_QUEUES_ACTION(x) ((x) << 0) + /* 0 - PREEMPT_QUEUES + * 1 - RESET_QUEUES + * 2 - DISABLE_PROCESS_QUEUES + * 3 - PREEMPT_QUEUES_NO_UNMAP + */ +# define PACKET3_UNMAP_QUEUES_QUEUE_SEL(x) ((x) << 4) +# define PACKET3_UNMAP_QUEUES_ENGINE_SEL(x) ((x) << 26) +# define PACKET3_UNMAP_QUEUES_NUM_QUEUES(x) ((x) << 29) +/* CONTROL2a */ +# define PACKET3_UNMAP_QUEUES_PASID(x) ((x) << 0) +/* CONTROL2b */ +# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(x) ((x) << 2) +/* CONTROL3a */ +# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET1(x) ((x) << 2) +/* CONTROL3b */ +# define PACKET3_UNMAP_QUEUES_RB_WPTR(x) ((x) << 0) +/* CONTROL4 */ +# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET2(x) ((x) << 2) +/* CONTROL5 */ +# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET3(x) ((x) << 2) +#define PACKET3_QUERY_STATUS 0xA4 +/* 1. header + * 2. CONTROL + * 3. CONTROL2 + * 4. ADDR_LO [31:0] + * 5. ADDR_HI [31:0] + * 6. DATA_LO [31:0] + * 7. DATA_HI [31:0] + */ +/* CONTROL */ +# define PACKET3_QUERY_STATUS_CONTEXT_ID(x) ((x) << 0) +# define PACKET3_QUERY_STATUS_INTERRUPT_SEL(x) ((x) << 28) +# define PACKET3_QUERY_STATUS_COMMAND(x) ((x) << 30) +/* CONTROL2a */ +# define PACKET3_QUERY_STATUS_PASID(x) ((x) << 0) +/* CONTROL2b */ +# define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2) +# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25) +#define PACKET3_RUN_LIST 0xA5 +#define PACKET3_MAP_PROCESS_VM 0xA6 + + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index f3a7d207af07..74a9fe8e0cfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -43,6 +43,7 @@ enum psp_gfx_crtl_cmd_id GFX_CTRL_CMD_ID_ENABLE_INT = 0x00050000, /* enable PSP-to-Gfx interrupt */ GFX_CTRL_CMD_ID_DISABLE_INT = 0x00060000, /* disable PSP-to-Gfx interrupt */ GFX_CTRL_CMD_ID_MODE1_RST = 0x00070000, /* trigger the Mode 1 reset */ + GFX_CTRL_CMD_ID_GBR_IH_SET = 0x00080000, /* set Gbr IH_RB_CNTL registers */ GFX_CTRL_CMD_ID_CONSUME_CMD = 0x000A0000, /* send interrupt to psp for updating write pointer of vf */ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING = 0x000C0000, /* destroy GPCOM ring */ @@ -79,6 +80,18 @@ struct psp_gfx_ctrl */ #define GFX_FLAG_RESPONSE 0x80000000 +/* Gbr IH registers ID */ +enum ih_reg_id { + IH_RB = 0, // IH_RB_CNTL + IH_RB_RNG1 = 1, // IH_RB_CNTL_RING1 + IH_RB_RNG2 = 2, // IH_RB_CNTL_RING2 +}; + +/* Command to setup Gibraltar IH register */ +struct psp_gfx_cmd_gbr_ih_reg { + uint32_t reg_value; /* Value to be set to the IH_RB_CNTL... register*/ + enum ih_reg_id reg_id; /* ID of the register */ +}; /* TEE Gfx Command IDs for the ring buffer interface. */ enum psp_gfx_cmd_id @@ -93,9 +106,12 @@ enum psp_gfx_cmd_id GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */ GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */ GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */ + GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */ + /* IDs upto 0x1F are reserved for older programs (Raven, Vega 10/12/20) */ + GFX_CMD_ID_LOAD_TOC = 0x00000020, /* Load TOC and obtain TMR size */ + GFX_CMD_ID_AUTOLOAD_RLC = 0x00000021, /* Indicates all graphics fw loaded, start RLC autoload */ }; - /* Command to load Trusted Application binary into PSP OS. */ struct psp_gfx_cmd_load_ta { @@ -167,33 +183,66 @@ struct psp_gfx_cmd_setup_tmr /* FW types for GFX_CMD_ID_LOAD_IP_FW command. Limit 31. */ -enum psp_gfx_fw_type -{ - GFX_FW_TYPE_NONE = 0, - GFX_FW_TYPE_CP_ME = 1, - GFX_FW_TYPE_CP_PFP = 2, - GFX_FW_TYPE_CP_CE = 3, - GFX_FW_TYPE_CP_MEC = 4, - GFX_FW_TYPE_CP_MEC_ME1 = 5, - GFX_FW_TYPE_CP_MEC_ME2 = 6, - GFX_FW_TYPE_RLC_V = 7, - GFX_FW_TYPE_RLC_G = 8, - GFX_FW_TYPE_SDMA0 = 9, - GFX_FW_TYPE_SDMA1 = 10, - GFX_FW_TYPE_DMCU_ERAM = 11, - GFX_FW_TYPE_DMCU_ISR = 12, - GFX_FW_TYPE_VCN = 13, - GFX_FW_TYPE_UVD = 14, - GFX_FW_TYPE_VCE = 15, - GFX_FW_TYPE_ISP = 16, - GFX_FW_TYPE_ACP = 17, - GFX_FW_TYPE_SMU = 18, - GFX_FW_TYPE_MMSCH = 19, - GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM = 20, - GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM = 21, - GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_CNTL = 22, - GFX_FW_TYPE_UVD1 = 23, - GFX_FW_TYPE_MAX = 24 +enum psp_gfx_fw_type { + GFX_FW_TYPE_NONE = 0, /* */ + GFX_FW_TYPE_CP_ME = 1, /* CP-ME VG + RV */ + GFX_FW_TYPE_CP_PFP = 2, /* CP-PFP VG + RV */ + GFX_FW_TYPE_CP_CE = 3, /* CP-CE VG + RV */ + GFX_FW_TYPE_CP_MEC = 4, /* CP-MEC FW VG + RV */ + GFX_FW_TYPE_CP_MEC_ME1 = 5, /* CP-MEC Jump Table 1 VG + RV */ + GFX_FW_TYPE_CP_MEC_ME2 = 6, /* CP-MEC Jump Table 2 VG */ + GFX_FW_TYPE_RLC_V = 7, /* RLC-V VG */ + GFX_FW_TYPE_RLC_G = 8, /* RLC-G VG + RV */ + GFX_FW_TYPE_SDMA0 = 9, /* SDMA0 VG + RV */ + GFX_FW_TYPE_SDMA1 = 10, /* SDMA1 VG */ + GFX_FW_TYPE_DMCU_ERAM = 11, /* DMCU-ERAM VG + RV */ + GFX_FW_TYPE_DMCU_ISR = 12, /* DMCU-ISR VG + RV */ + GFX_FW_TYPE_VCN = 13, /* VCN RV */ + GFX_FW_TYPE_UVD = 14, /* UVD VG */ + GFX_FW_TYPE_VCE = 15, /* VCE VG */ + GFX_FW_TYPE_ISP = 16, /* ISP RV */ + GFX_FW_TYPE_ACP = 17, /* ACP RV */ + GFX_FW_TYPE_SMU = 18, /* SMU VG */ + GFX_FW_TYPE_MMSCH = 19, /* MMSCH VG */ + GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM = 20, /* RLC GPM VG + RV */ + GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM = 21, /* RLC SRM VG + RV */ + GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_CNTL = 22, /* RLC CNTL VG + RV */ + GFX_FW_TYPE_UVD1 = 23, /* UVD1 VG-20 */ + GFX_FW_TYPE_TOC = 24, /* TOC NV-10 */ + GFX_FW_TYPE_RLC_P = 25, /* RLC P NV */ + GFX_FW_TYPE_RLX6 = 26, /* RLX6 NV */ + GFX_FW_TYPE_GLOBAL_TAP_DELAYS = 27, /* GLOBAL TAP DELAYS NV */ + GFX_FW_TYPE_SE0_TAP_DELAYS = 28, /* SE0 TAP DELAYS NV */ + GFX_FW_TYPE_SE1_TAP_DELAYS = 29, /* SE1 TAP DELAYS NV */ + GFX_FW_TYPE_GLOBAL_SE0_SE1_SKEW_DELAYS = 30, /* GLOBAL SE0/1 SKEW DELAYS NV */ + GFX_FW_TYPE_SDMA0_JT = 31, /* SDMA0 JT NV */ + GFX_FW_TYPE_SDMA1_JT = 32, /* SDNA1 JT NV */ + GFX_FW_TYPE_CP_MES = 33, /* CP MES NV */ + GFX_FW_TYPE_MES_STACK = 34, /* MES STACK NV */ + GFX_FW_TYPE_RLC_SRM_DRAM_SR = 35, /* RLC SRM DRAM NV */ + GFX_FW_TYPE_RLCG_SCRATCH_SR = 36, /* RLCG SCRATCH NV */ + GFX_FW_TYPE_RLCP_SCRATCH_SR = 37, /* RLCP SCRATCH NV */ + GFX_FW_TYPE_RLCV_SCRATCH_SR = 38, /* RLCV SCRATCH NV */ + GFX_FW_TYPE_RLX6_DRAM_SR = 39, /* RLX6 DRAM NV */ + GFX_FW_TYPE_SDMA0_PG_CONTEXT = 40, /* SDMA0 PG CONTEXT NV */ + GFX_FW_TYPE_SDMA1_PG_CONTEXT = 41, /* SDMA1 PG CONTEXT NV */ + GFX_FW_TYPE_GLOBAL_MUX_SELECT_RAM = 42, /* GLOBAL MUX SEL RAM NV */ + GFX_FW_TYPE_SE0_MUX_SELECT_RAM = 43, /* SE0 MUX SEL RAM NV */ + GFX_FW_TYPE_SE1_MUX_SELECT_RAM = 44, /* SE1 MUX SEL RAM NV */ + GFX_FW_TYPE_ACCUM_CTRL_RAM = 45, /* ACCUM CTRL RAM NV */ + GFX_FW_TYPE_RLCP_CAM = 46, /* RLCP CAM NV */ + GFX_FW_TYPE_RLC_SPP_CAM_EXT = 47, /* RLC SPP CAM EXT NV */ + GFX_FW_TYPE_RLX6_DRAM_BOOT = 48, /* RLX6 DRAM BOOT NV */ + GFX_FW_TYPE_VCN0_RAM = 49, /* VCN_RAM NV + RN */ + GFX_FW_TYPE_VCN1_RAM = 50, /* VCN_RAM NV + RN */ + GFX_FW_TYPE_DMUB = 51, /* DMUB RN */ + GFX_FW_TYPE_SDMA2 = 52, /* SDMA2 MI */ + GFX_FW_TYPE_SDMA3 = 53, /* SDMA3 MI */ + GFX_FW_TYPE_SDMA4 = 54, /* SDMA4 MI */ + GFX_FW_TYPE_SDMA5 = 55, /* SDMA5 MI */ + GFX_FW_TYPE_SDMA6 = 56, /* SDMA6 MI */ + GFX_FW_TYPE_SDMA7 = 57, /* SDMA7 MI */ + GFX_FW_TYPE_MAX }; /* Command to load HW IP FW. */ @@ -216,6 +265,20 @@ struct psp_gfx_cmd_save_restore_ip_fw enum psp_gfx_fw_type fw_type; /* FW type */ }; +/* Command to setup register program */ +struct psp_gfx_cmd_reg_prog { + uint32_t reg_value; + uint32_t reg_id; +}; + +/* Command to load TOC */ +struct psp_gfx_cmd_load_toc +{ + uint32_t toc_phy_addr_lo; /* bits [31:0] of GPU Virtual address of FW location (must be 4 KB aligned) */ + uint32_t toc_phy_addr_hi; /* bits [63:32] of GPU Virtual address of FW location */ + uint32_t toc_size; /* FW buffer size in bytes */ +}; + /* All GFX ring buffer commands. */ union psp_gfx_commands { @@ -225,21 +288,24 @@ union psp_gfx_commands struct psp_gfx_cmd_setup_tmr cmd_setup_tmr; struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw; struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw; + struct psp_gfx_cmd_reg_prog cmd_setup_reg_prog; + struct psp_gfx_cmd_setup_tmr cmd_setup_vmr; + struct psp_gfx_cmd_load_toc cmd_load_toc; }; - /* Structure of GFX Response buffer. * For GPCOM I/F it is part of GFX_CMD_RESP buffer, for RBI * it is separate buffer. */ struct psp_gfx_resp { - uint32_t status; /* +0 status of command execution */ - uint32_t session_id; /* +4 session ID in response to LoadTa command */ - uint32_t fw_addr_lo; /* +8 bits [31:0] of FW address within TMR (in response to cmd_load_ip_fw command) */ - uint32_t fw_addr_hi; /* +12 bits [63:32] of FW address within TMR (in response to cmd_load_ip_fw command) */ + uint32_t status; /* +0 status of command execution */ + uint32_t session_id; /* +4 session ID in response to LoadTa command */ + uint32_t fw_addr_lo; /* +8 bits [31:0] of FW address within TMR (in response to cmd_load_ip_fw command) */ + uint32_t fw_addr_hi; /* +12 bits [63:32] of FW address within TMR (in response to cmd_load_ip_fw command) */ + uint32_t tmr_size; /* +16 size of the TMR to be reserved including MM fw and Gfx fw in response to cmd_load_toc command */ - uint32_t reserved[4]; + uint32_t reserved[3]; /* total 32 bytes */ }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index 77c2bc344dfc..b345e69ba246 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -24,6 +24,9 @@ */ #include <linux/firmware.h> +#include <linux/module.h> +#include <linux/pci.h> + #include "amdgpu.h" #include "amdgpu_psp.h" #include "amdgpu_ucode.h" @@ -37,6 +40,9 @@ MODULE_FIRMWARE("amdgpu/raven_asd.bin"); MODULE_FIRMWARE("amdgpu/picasso_asd.bin"); MODULE_FIRMWARE("amdgpu/raven2_asd.bin"); +MODULE_FIRMWARE("amdgpu/picasso_ta.bin"); +MODULE_FIRMWARE("amdgpu/raven2_ta.bin"); +MODULE_FIRMWARE("amdgpu/raven_ta.bin"); static int psp_v10_0_init_microcode(struct psp_context *psp) { @@ -45,7 +51,7 @@ static int psp_v10_0_init_microcode(struct psp_context *psp) char fw_name[30]; int err = 0; const struct psp_firmware_header_v1_0 *hdr; - + const struct ta_firmware_header_v1_0 *ta_hdr; DRM_DEBUG("\n"); switch (adev->asic_type) { @@ -76,7 +82,45 @@ static int psp_v10_0_init_microcode(struct psp_context *psp) adev->psp.asd_start_addr = (uint8_t *)hdr + le32_to_cpu(hdr->header.ucode_array_offset_bytes); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); + err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); + if (err) { + release_firmware(adev->psp.ta_fw); + adev->psp.ta_fw = NULL; + dev_info(adev->dev, + "psp v10.0: Failed to load firmware \"%s\"\n", + fw_name); + } else { + err = amdgpu_ucode_validate(adev->psp.ta_fw); + if (err) + goto out2; + + ta_hdr = (const struct ta_firmware_header_v1_0 *) + adev->psp.ta_fw->data; + adev->psp.ta_hdcp_ucode_version = + le32_to_cpu(ta_hdr->ta_hdcp_ucode_version); + adev->psp.ta_hdcp_ucode_size = + le32_to_cpu(ta_hdr->ta_hdcp_size_bytes); + adev->psp.ta_hdcp_start_addr = + (uint8_t *)ta_hdr + + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); + + adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version); + + adev->psp.ta_dtm_ucode_version = + le32_to_cpu(ta_hdr->ta_dtm_ucode_version); + adev->psp.ta_dtm_ucode_size = + le32_to_cpu(ta_hdr->ta_dtm_size_bytes); + adev->psp.ta_dtm_start_addr = + (uint8_t *)adev->psp.ta_hdcp_start_addr + + le32_to_cpu(ta_hdr->ta_dtm_offset_bytes); + } + return 0; + +out2: + release_firmware(adev->psp.ta_fw); + adev->psp.ta_fw = NULL; out: if (err) { dev_err(adev->dev, @@ -187,7 +231,6 @@ static int psp_v10_0_ring_destroy(struct psp_context *psp, } static int psp_v10_0_cmd_submit(struct psp_context *psp, - struct amdgpu_firmware_info *ucode, uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, int index) { @@ -226,6 +269,7 @@ static int psp_v10_0_cmd_submit(struct psp_context *psp, write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); write_frame->fence_value = index; + amdgpu_asic_flush_hdp(adev, NULL); /* Update the write Pointer in DWORDs */ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 860b70d80d3c..ffeaa2f5588d 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -21,6 +21,8 @@ */ #include <linux/firmware.h> +#include <linux/module.h> + #include "amdgpu.h" #include "amdgpu_psp.h" #include "amdgpu_ucode.h" @@ -33,12 +35,31 @@ #include "sdma0/sdma0_4_0_offset.h" #include "nbio/nbio_7_4_offset.h" +#include "oss/osssys_4_0_offset.h" +#include "oss/osssys_4_0_sh_mask.h" + MODULE_FIRMWARE("amdgpu/vega20_sos.bin"); MODULE_FIRMWARE("amdgpu/vega20_asd.bin"); MODULE_FIRMWARE("amdgpu/vega20_ta.bin"); +MODULE_FIRMWARE("amdgpu/navi10_sos.bin"); +MODULE_FIRMWARE("amdgpu/navi10_asd.bin"); +MODULE_FIRMWARE("amdgpu/navi14_sos.bin"); +MODULE_FIRMWARE("amdgpu/navi14_asd.bin"); +MODULE_FIRMWARE("amdgpu/navi12_sos.bin"); +MODULE_FIRMWARE("amdgpu/navi12_asd.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_sos.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_asd.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_ta.bin"); /* address block */ #define smnMP1_FIRMWARE_FLAGS 0x3010024 +/* navi10 reg offset define */ +#define mmRLC_GPM_UCODE_ADDR_NV10 0x5b61 +#define mmRLC_GPM_UCODE_DATA_NV10 0x5b62 +#define mmSDMA0_UCODE_ADDR_NV10 0x5880 +#define mmSDMA0_UCODE_DATA_NV10 0x5881 +/* memory training timeout define */ +#define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000 static int psp_v11_0_init_microcode(struct psp_context *psp) { @@ -47,6 +68,8 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) char fw_name[30]; int err = 0; const struct psp_firmware_header_v1_0 *sos_hdr; + const struct psp_firmware_header_v1_1 *sos_hdr_v1_1; + const struct psp_firmware_header_v1_2 *sos_hdr_v1_2; const struct psp_firmware_header_v1_0 *asd_hdr; const struct ta_firmware_header_v1_0 *ta_hdr; @@ -56,6 +79,18 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) case CHIP_VEGA20: chip_name = "vega20"; break; + case CHIP_NAVI10: + chip_name = "navi10"; + break; + case CHIP_NAVI14: + chip_name = "navi14"; + break; + case CHIP_NAVI12: + chip_name = "navi12"; + break; + case CHIP_ARCTURUS: + chip_name = "arcturus"; + break; default: BUG(); } @@ -70,15 +105,40 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) goto out; sos_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data; - adev->psp.sos_fw_version = le32_to_cpu(sos_hdr->header.ucode_version); - adev->psp.sos_feature_version = le32_to_cpu(sos_hdr->ucode_feature_version); - adev->psp.sos_bin_size = le32_to_cpu(sos_hdr->sos_size_bytes); - adev->psp.sys_bin_size = le32_to_cpu(sos_hdr->header.ucode_size_bytes) - - le32_to_cpu(sos_hdr->sos_size_bytes); - adev->psp.sys_start_addr = (uint8_t *)sos_hdr + + amdgpu_ucode_print_psp_hdr(&sos_hdr->header); + + switch (sos_hdr->header.header_version_major) { + case 1: + adev->psp.sos_fw_version = le32_to_cpu(sos_hdr->header.ucode_version); + adev->psp.sos_feature_version = le32_to_cpu(sos_hdr->ucode_feature_version); + adev->psp.sos_bin_size = le32_to_cpu(sos_hdr->sos_size_bytes); + adev->psp.sys_bin_size = le32_to_cpu(sos_hdr->sos_offset_bytes); + adev->psp.sys_start_addr = (uint8_t *)sos_hdr + le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes); - adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr + + adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr + le32_to_cpu(sos_hdr->sos_offset_bytes); + if (sos_hdr->header.header_version_minor == 1) { + sos_hdr_v1_1 = (const struct psp_firmware_header_v1_1 *)adev->psp.sos_fw->data; + adev->psp.toc_bin_size = le32_to_cpu(sos_hdr_v1_1->toc_size_bytes); + adev->psp.toc_start_addr = (uint8_t *)adev->psp.sys_start_addr + + le32_to_cpu(sos_hdr_v1_1->toc_offset_bytes); + adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_1->kdb_size_bytes); + adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr + + le32_to_cpu(sos_hdr_v1_1->kdb_offset_bytes); + } + if (sos_hdr->header.header_version_minor == 2) { + sos_hdr_v1_2 = (const struct psp_firmware_header_v1_2 *)adev->psp.sos_fw->data; + adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_2->kdb_size_bytes); + adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr + + le32_to_cpu(sos_hdr_v1_2->kdb_offset_bytes); + } + break; + default: + dev_err(adev->dev, + "Unsupported psp sos firmware\n"); + err = -EINVAL; + goto out; + } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name); err = request_firmware(&adev->psp.asd_fw, fw_name, adev->dev); @@ -96,23 +156,39 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) adev->psp.asd_start_addr = (uint8_t *)asd_hdr + le32_to_cpu(asd_hdr->header.ucode_array_offset_bytes); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); - err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); - if (err) { - release_firmware(adev->psp.ta_fw); - adev->psp.ta_fw = NULL; - dev_info(adev->dev, - "psp v11.0: Failed to load firmware \"%s\"\n", fw_name); - } else { - err = amdgpu_ucode_validate(adev->psp.ta_fw); - if (err) - goto out2; - - ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data; - adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version); - adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes); - adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr + - le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); + switch (adev->asic_type) { + case CHIP_VEGA20: + case CHIP_ARCTURUS: + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); + err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); + if (err) { + release_firmware(adev->psp.ta_fw); + adev->psp.ta_fw = NULL; + dev_info(adev->dev, + "psp v11.0: Failed to load firmware \"%s\"\n", fw_name); + } else { + err = amdgpu_ucode_validate(adev->psp.ta_fw); + if (err) + goto out2; + + ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data; + adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version); + adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes); + adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr + + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); + adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version); + adev->psp.ta_ras_ucode_version = le32_to_cpu(ta_hdr->ta_ras_ucode_version); + adev->psp.ta_ras_ucode_size = le32_to_cpu(ta_hdr->ta_ras_size_bytes); + adev->psp.ta_ras_start_addr = (uint8_t *)adev->psp.ta_xgmi_start_addr + + le32_to_cpu(ta_hdr->ta_ras_offset_bytes); + } + break; + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + break; + default: + BUG(); } return 0; @@ -132,20 +208,68 @@ out: return err; } +static bool psp_v11_0_is_sos_alive(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + uint32_t sol_reg; + + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + + return sol_reg != 0x0; +} + +static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp) +{ + int ret; + uint32_t psp_gfxdrv_command_reg = 0; + struct amdgpu_device *adev = psp->adev; + + /* Check tOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + if (psp_v11_0_is_sos_alive(psp)) { + psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); + dev_info(adev->dev, "sos fw version = 0x%x.\n", psp->sos_fw_version); + return 0; + } + + /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret) + return ret; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + + /* Copy PSP KDB binary to memory */ + memcpy(psp->fw_pri_buf, psp->kdb_start_addr, psp->kdb_bin_size); + + /* Provide the PSP KDB to bootloader */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, + (uint32_t)(psp->fw_pri_mc_addr >> 20)); + psp_gfxdrv_command_reg = PSP_BL__LOAD_KEY_DATABASE; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, + psp_gfxdrv_command_reg); + + /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1*/ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + + return ret; +} + static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp) { int ret; uint32_t psp_gfxdrv_command_reg = 0; struct amdgpu_device *adev = psp->adev; - uint32_t sol_reg; /* Check sOS sign of life register to confirm sys driver and sOS * are already been loaded. */ - sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); - if (sol_reg) { + if (psp_v11_0_is_sos_alive(psp)) { psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); - printk("sos fw version = 0x%x.\n", psp->sos_fw_version); + dev_info(adev->dev, "sos fw version = 0x%x.\n", psp->sos_fw_version); return 0; } @@ -163,7 +287,7 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp) /* Provide the sys driver to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (uint32_t)(psp->fw_pri_mc_addr >> 20)); - psp_gfxdrv_command_reg = 1 << 16; + psp_gfxdrv_command_reg = PSP_BL__LOAD_SYSDRV; WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, psp_gfxdrv_command_reg); @@ -181,13 +305,11 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp) int ret; unsigned int psp_gfxdrv_command_reg = 0; struct amdgpu_device *adev = psp->adev; - uint32_t sol_reg; /* Check sOS sign of life register to confirm sys driver and sOS * are already been loaded. */ - sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); - if (sol_reg) + if (psp_v11_0_is_sos_alive(psp)) return 0; /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ @@ -204,7 +326,7 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp) /* Provide the PSP secure OS to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (uint32_t)(psp->fw_pri_mc_addr >> 20)); - psp_gfxdrv_command_reg = 2 << 16; + psp_gfxdrv_command_reg = PSP_BL__LOAD_SOSDRV; WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, psp_gfxdrv_command_reg); @@ -217,6 +339,37 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp) return ret; } +static void psp_v11_0_reroute_ih(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + uint32_t tmp; + + /* Change IH ring for VMC */ + tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1244b); + tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1); + tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); + + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 3); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET); + + mdelay(20); + psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + + /* Change IH ring for UMC */ + tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b); + tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); + + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 4); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET); + + mdelay(20); + psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); +} + static int psp_v11_0_ring_init(struct psp_context *psp, enum psp_ring_type ring_type) { @@ -224,6 +377,8 @@ static int psp_v11_0_ring_init(struct psp_context *psp, struct psp_ring *ring; struct amdgpu_device *adev = psp->adev; + psp_v11_0_reroute_ih(psp); + ring = &psp->km_ring; ring->ring_type = ring_type; @@ -250,6 +405,34 @@ static bool psp_v11_0_support_vmr_ring(struct psp_context *psp) return false; } +static int psp_v11_0_ring_stop(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct amdgpu_device *adev = psp->adev; + + /* Write the ring destroy command*/ + if (psp_v11_0_support_vmr_ring(psp)) + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING); + else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, + GFX_CTRL_CMD_ID_DESTROY_RINGS); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) */ + if (psp_v11_0_support_vmr_ring(psp)) + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + else + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + + return ret; +} + static int psp_v11_0_ring_create(struct psp_context *psp, enum psp_ring_type ring_type) { @@ -259,6 +442,12 @@ static int psp_v11_0_ring_create(struct psp_context *psp, struct amdgpu_device *adev = psp->adev; if (psp_v11_0_support_vmr_ring(psp)) { + ret = psp_v11_0_ring_stop(psp, ring_type); + if (ret) { + DRM_ERROR("psp_v11_0_ring_stop_sriov failed!\n"); + return ret; + } + /* Write low address of the ring to C2PMSG_102 */ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg); @@ -278,6 +467,14 @@ static int psp_v11_0_ring_create(struct psp_context *psp, 0x80000000, 0x8000FFFF, false); } else { + /* Wait for sOS ready for ring creation */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + if (ret) { + DRM_ERROR("Failed to wait for sOS ready for ring creation\n"); + return ret; + } + /* Write low address of the ring to C2PMSG_69 */ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); @@ -303,33 +500,6 @@ static int psp_v11_0_ring_create(struct psp_context *psp, return ret; } -static int psp_v11_0_ring_stop(struct psp_context *psp, - enum psp_ring_type ring_type) -{ - int ret = 0; - struct amdgpu_device *adev = psp->adev; - - /* Write the ring destroy command*/ - if (psp_v11_0_support_vmr_ring(psp)) - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, - GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING); - else - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, - GFX_CTRL_CMD_ID_DESTROY_RINGS); - - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); - - /* Wait for response flag (bit 31) */ - if (psp_v11_0_support_vmr_ring(psp)) - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), - 0x80000000, 0x80000000, false); - else - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); - - return ret; -} static int psp_v11_0_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) @@ -350,7 +520,6 @@ static int psp_v11_0_ring_destroy(struct psp_context *psp, } static int psp_v11_0_cmd_submit(struct psp_context *psp, - struct amdgpu_firmware_info *ucode, uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, int index) { @@ -394,6 +563,7 @@ static int psp_v11_0_cmd_submit(struct psp_context *psp, write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); write_frame->fence_value = index; + amdgpu_asic_flush_hdp(adev, NULL); /* Update the write Pointer in DWORDs */ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; @@ -456,14 +626,24 @@ psp_v11_0_sram_map(struct amdgpu_device *adev, case AMDGPU_UCODE_ID_RLC_G: *sram_offset = 0x2000; - *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR); - *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA); + if (adev->asic_type < CHIP_NAVI10) { + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA); + } else { + *sram_addr_reg_offset = adev->reg_offset[GC_HWIP][0][1] + mmRLC_GPM_UCODE_ADDR_NV10; + *sram_data_reg_offset = adev->reg_offset[GC_HWIP][0][1] + mmRLC_GPM_UCODE_DATA_NV10; + } break; case AMDGPU_UCODE_ID_SDMA0: *sram_offset = 0x0; - *sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR); - *sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA); + if (adev->asic_type < CHIP_NAVI10) { + *sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA); + } else { + *sram_addr_reg_offset = adev->reg_offset[GC_HWIP][0][1] + mmSDMA0_UCODE_ADDR_NV10; + *sram_data_reg_offset = adev->reg_offset[GC_HWIP][0][1] + mmSDMA0_UCODE_DATA_NV10; + } break; /* TODO: needs to confirm */ @@ -631,7 +811,7 @@ static int psp_v11_0_xgmi_set_topology_info(struct psp_context *psp, for (i = 0; i < topology_info_input->num_nodes; i++) { topology_info_input->nodes[i].node_id = topology->nodes[i].node_id; topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops; - topology_info_input->nodes[i].is_sharing_enabled = topology->nodes[i].is_sharing_enabled; + topology_info_input->nodes[i].is_sharing_enabled = 1; topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine; } @@ -679,8 +859,218 @@ static int psp_v11_0_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id return 0; } +static int psp_v11_0_ras_trigger_error(struct psp_context *psp, + struct ta_ras_trigger_error_input *info) +{ + struct ta_ras_shared_memory *ras_cmd; + int ret; + + if (!psp->ras.ras_initialized) + return -EINVAL; + + ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; + memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); + + ras_cmd->cmd_id = TA_RAS_COMMAND__TRIGGER_ERROR; + ras_cmd->ras_in_message.trigger_error = *info; + + ret = psp_ras_invoke(psp, ras_cmd->cmd_id); + if (ret) + return -EINVAL; + + return ras_cmd->ras_status; +} + +static int psp_v11_0_ras_cure_posion(struct psp_context *psp, uint64_t *mode_ptr) +{ +#if 0 + // not support yet. + struct ta_ras_shared_memory *ras_cmd; + int ret; + + if (!psp->ras.ras_initialized) + return -EINVAL; + + ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; + memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); + + ras_cmd->cmd_id = TA_RAS_COMMAND__CURE_POISON; + ras_cmd->ras_in_message.cure_poison.mode_ptr = mode_ptr; + + ret = psp_ras_invoke(psp, ras_cmd->cmd_id); + if (ret) + return -EINVAL; + + return ras_cmd->ras_status; +#else + return -EINVAL; +#endif +} + +static int psp_v11_0_rlc_autoload_start(struct psp_context *psp) +{ + return psp_rlc_autoload_start(psp); +} + +static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg) +{ + int ret; + int i; + uint32_t data_32; + int max_wait; + struct amdgpu_device *adev = psp->adev; + + data_32 = (psp->mem_train_ctx.c2p_train_data_offset >> 20); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, data_32); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, msg); + + max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout; + for (i = 0; i < max_wait; i++) { + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret == 0) + break; + } + if (i < max_wait) + ret = 0; + else + ret = -ETIME; + + DRM_DEBUG("training %s %s, cost %d @ %d ms\n", + (msg == PSP_BL__DRAM_SHORT_TRAIN) ? "short" : "long", + (ret == 0) ? "succeed" : "failed", + i, adev->usec_timeout/1000); + return ret; +} + +static void psp_v11_0_memory_training_fini(struct psp_context *psp) +{ + struct psp_memory_training_context *ctx = &psp->mem_train_ctx; + + ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT; + kfree(ctx->sys_cache); + ctx->sys_cache = NULL; +} + +static int psp_v11_0_memory_training_init(struct psp_context *psp) +{ + int ret; + struct psp_memory_training_context *ctx = &psp->mem_train_ctx; + + if (ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) { + DRM_DEBUG("memory training is not supported!\n"); + return 0; + } + + ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL); + if (ctx->sys_cache == NULL) { + DRM_ERROR("alloc mem_train_ctx.sys_cache failed!\n"); + ret = -ENOMEM; + goto Err_out; + } + + DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n", + ctx->train_data_size, + ctx->p2c_train_data_offset, + ctx->c2p_train_data_offset); + ctx->init = PSP_MEM_TRAIN_INIT_SUCCESS; + return 0; + +Err_out: + psp_v11_0_memory_training_fini(psp); + return ret; +} + +/* + * save and restore proces + */ +static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) +{ + int ret; + uint32_t p2c_header[4]; + struct psp_memory_training_context *ctx = &psp->mem_train_ctx; + uint32_t *pcache = (uint32_t*)ctx->sys_cache; + + if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) { + DRM_DEBUG("Memory training is not supported.\n"); + return 0; + } else if (ctx->init != PSP_MEM_TRAIN_INIT_SUCCESS) { + DRM_ERROR("Memory training initialization failure.\n"); + return -EINVAL; + } + + if (psp_v11_0_is_sos_alive(psp)) { + DRM_DEBUG("SOS is alive, skip memory training.\n"); + return 0; + } + + amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false); + DRM_DEBUG("sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n", + pcache[0], pcache[1], pcache[2], pcache[3], + p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]); + + if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) { + DRM_DEBUG("Short training depends on restore.\n"); + ops |= PSP_MEM_TRAIN_RESTORE; + } + + if ((ops & PSP_MEM_TRAIN_RESTORE) && + pcache[0] != MEM_TRAIN_SYSTEM_SIGNATURE) { + DRM_DEBUG("sys_cache[0] is invalid, restore depends on save.\n"); + ops |= PSP_MEM_TRAIN_SAVE; + } + + if (p2c_header[0] == MEM_TRAIN_SYSTEM_SIGNATURE && + !(pcache[0] == MEM_TRAIN_SYSTEM_SIGNATURE && + pcache[3] == p2c_header[3])) { + DRM_DEBUG("sys_cache is invalid or out-of-date, need save training data to sys_cache.\n"); + ops |= PSP_MEM_TRAIN_SAVE; + } + + if ((ops & PSP_MEM_TRAIN_SAVE) && + p2c_header[0] != MEM_TRAIN_SYSTEM_SIGNATURE) { + DRM_DEBUG("p2c_header[0] is invalid, save depends on long training.\n"); + ops |= PSP_MEM_TRAIN_SEND_LONG_MSG; + } + + if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) { + ops &= ~PSP_MEM_TRAIN_SEND_SHORT_MSG; + ops |= PSP_MEM_TRAIN_SAVE; + } + + DRM_DEBUG("Memory training ops:%x.\n", ops); + + if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) { + ret = psp_v11_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN); + if (ret) { + DRM_ERROR("Send long training msg failed.\n"); + return ret; + } + } + + if (ops & PSP_MEM_TRAIN_SAVE) { + amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, ctx->sys_cache, ctx->train_data_size, false); + } + + if (ops & PSP_MEM_TRAIN_RESTORE) { + amdgpu_device_vram_access(psp->adev, ctx->c2p_train_data_offset, ctx->sys_cache, ctx->train_data_size, true); + } + + if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) { + ret = psp_v11_0_memory_training_send_msg(psp, (amdgpu_force_long_training > 0) ? + PSP_BL__DRAM_LONG_TRAIN : PSP_BL__DRAM_SHORT_TRAIN); + if (ret) { + DRM_ERROR("send training msg failed.\n"); + return ret; + } + } + ctx->training_cnt++; + return 0; +} + static const struct psp_funcs psp_v11_0_funcs = { .init_microcode = psp_v11_0_init_microcode, + .bootloader_load_kdb = psp_v11_0_bootloader_load_kdb, .bootloader_load_sysdrv = psp_v11_0_bootloader_load_sysdrv, .bootloader_load_sos = psp_v11_0_bootloader_load_sos, .ring_init = psp_v11_0_ring_init, @@ -695,6 +1085,12 @@ static const struct psp_funcs psp_v11_0_funcs = { .xgmi_get_hive_id = psp_v11_0_xgmi_get_hive_id, .xgmi_get_node_id = psp_v11_0_xgmi_get_node_id, .support_vmr_ring = psp_v11_0_support_vmr_ring, + .ras_trigger_error = psp_v11_0_ras_trigger_error, + .ras_cure_posion = psp_v11_0_ras_cure_posion, + .rlc_autoload_start = psp_v11_0_rlc_autoload_start, + .mem_training_init = psp_v11_0_memory_training_init, + .mem_training_fini = psp_v11_0_memory_training_fini, + .mem_training = psp_v11_0_memory_training, }; void psp_v11_0_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c new file mode 100644 index 000000000000..8f553f6f92d6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -0,0 +1,566 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/firmware.h> +#include <linux/module.h> +#include "amdgpu.h" +#include "amdgpu_psp.h" +#include "amdgpu_ucode.h" +#include "soc15_common.h" +#include "psp_v12_0.h" + +#include "mp/mp_12_0_0_offset.h" +#include "mp/mp_12_0_0_sh_mask.h" +#include "gc/gc_9_0_offset.h" +#include "sdma0/sdma0_4_0_offset.h" +#include "nbio/nbio_7_4_offset.h" + +#include "oss/osssys_4_0_offset.h" +#include "oss/osssys_4_0_sh_mask.h" + +MODULE_FIRMWARE("amdgpu/renoir_asd.bin"); +/* address block */ +#define smnMP1_FIRMWARE_FLAGS 0x3010024 + +static int psp_v12_0_init_microcode(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + const char *chip_name; + char fw_name[30]; + int err = 0; + const struct psp_firmware_header_v1_0 *asd_hdr; + + DRM_DEBUG("\n"); + + switch (adev->asic_type) { + case CHIP_RENOIR: + chip_name = "renoir"; + break; + default: + BUG(); + } + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name); + err = request_firmware(&adev->psp.asd_fw, fw_name, adev->dev); + if (err) + goto out1; + + err = amdgpu_ucode_validate(adev->psp.asd_fw); + if (err) + goto out1; + + asd_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.asd_fw->data; + adev->psp.asd_fw_version = le32_to_cpu(asd_hdr->header.ucode_version); + adev->psp.asd_feature_version = le32_to_cpu(asd_hdr->ucode_feature_version); + adev->psp.asd_ucode_size = le32_to_cpu(asd_hdr->header.ucode_size_bytes); + adev->psp.asd_start_addr = (uint8_t *)asd_hdr + + le32_to_cpu(asd_hdr->header.ucode_array_offset_bytes); + + return 0; + +out1: + release_firmware(adev->psp.asd_fw); + adev->psp.asd_fw = NULL; + + return err; +} + +static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp) +{ + int ret; + uint32_t psp_gfxdrv_command_reg = 0; + struct amdgpu_device *adev = psp->adev; + uint32_t sol_reg; + + /* Check sOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + if (sol_reg) { + psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); + printk("sos fw version = 0x%x.\n", psp->sos_fw_version); + return 0; + } + + /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret) + return ret; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + + /* Copy PSP System Driver binary to memory */ + memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); + + /* Provide the sys driver to bootloader */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, + (uint32_t)(psp->fw_pri_mc_addr >> 20)); + psp_gfxdrv_command_reg = 1 << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, + psp_gfxdrv_command_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + + return ret; +} + +static int psp_v12_0_bootloader_load_sos(struct psp_context *psp) +{ + int ret; + unsigned int psp_gfxdrv_command_reg = 0; + struct amdgpu_device *adev = psp->adev; + uint32_t sol_reg; + + /* Check sOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + if (sol_reg) + return 0; + + /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret) + return ret; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + + /* Copy Secure OS binary to PSP memory */ + memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); + + /* Provide the PSP secure OS to bootloader */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, + (uint32_t)(psp->fw_pri_mc_addr >> 20)); + psp_gfxdrv_command_reg = 2 << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, + psp_gfxdrv_command_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81), + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), + 0, true); + + return ret; +} + +static void psp_v12_0_reroute_ih(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + uint32_t tmp; + + /* Change IH ring for VMC */ + tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1244b); + tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1); + tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); + + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 3); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET); + + mdelay(20); + psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + + /* Change IH ring for UMC */ + tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b); + tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); + + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 4); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET); + + mdelay(20); + psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); +} + +static int psp_v12_0_ring_init(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring; + struct amdgpu_device *adev = psp->adev; + + psp_v12_0_reroute_ih(psp); + + ring = &psp->km_ring; + + ring->ring_type = ring_type; + + /* allocate 4k Page of Local Frame Buffer memory for ring */ + ring->ring_size = 0x1000; + ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + if (ret) { + ring->ring_size = 0; + return ret; + } + + return 0; +} + +static bool psp_v12_0_support_vmr_ring(struct psp_context *psp) +{ + if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version > 0x80045) + return true; + return false; +} + +static int psp_v12_0_ring_create(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + unsigned int psp_ring_reg = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + if (psp_v12_0_support_vmr_ring(psp)) { + /* Write low address of the ring to C2PMSG_102 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg); + /* Write high address of the ring to C2PMSG_103 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg); + + /* Write the ring initialization command to C2PMSG_101 */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_INIT_GPCOM_RING); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x8000FFFF, false); + + } else { + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + } + + return ret; +} + +static int psp_v12_0_ring_stop(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct amdgpu_device *adev = psp->adev; + + /* Write the ring destroy command*/ + if (psp_v12_0_support_vmr_ring(psp)) + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING); + else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, + GFX_CTRL_CMD_ID_DESTROY_RINGS); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) */ + if (psp_v12_0_support_vmr_ring(psp)) + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + else + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + + return ret; +} + +static int psp_v12_0_ring_destroy(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + ret = psp_v12_0_ring_stop(psp, ring_type); + if (ret) + DRM_ERROR("Fail to stop psp ring\n"); + + amdgpu_bo_free_kernel(&adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + + return ret; +} + +static int psp_v12_0_cmd_submit(struct psp_context *psp, + uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, + int index) +{ + unsigned int psp_write_ptr_reg = 0; + struct psp_gfx_rb_frame *write_frame = psp->km_ring.ring_mem; + struct psp_ring *ring = &psp->km_ring; + struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem; + struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start + + ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1; + struct amdgpu_device *adev = psp->adev; + uint32_t ring_size_dw = ring->ring_size / 4; + uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; + + /* KM (GPCOM) prepare write pointer */ + if (psp_v12_0_support_vmr_ring(psp)) + psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); + else + psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); + + /* Update KM RB frame pointer to new frame */ + /* write_frame ptr increments by size of rb_frame in bytes */ + /* psp_write_ptr_reg increments by size of rb_frame in DWORDs */ + if ((psp_write_ptr_reg % ring_size_dw) == 0) + write_frame = ring_buffer_start; + else + write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw); + /* Check invalid write_frame ptr address */ + if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) { + DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n", + ring_buffer_start, ring_buffer_end, write_frame); + DRM_ERROR("write_frame is pointing to address out of bounds\n"); + return -EINVAL; + } + + /* Initialize KM RB frame */ + memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); + + /* Update KM RB frame */ + write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr); + write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr); + write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); + write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); + write_frame->fence_value = index; + amdgpu_asic_flush_hdp(adev, NULL); + + /* Update the write Pointer in DWORDs */ + psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; + if (psp_v12_0_support_vmr_ring(psp)) { + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); + + return 0; +} + +static int +psp_v12_0_sram_map(struct amdgpu_device *adev, + unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, + unsigned int *sram_data_reg_offset, + enum AMDGPU_UCODE_ID ucode_id) +{ + int ret = 0; + + switch (ucode_id) { +/* TODO: needs to confirm */ +#if 0 + case AMDGPU_UCODE_ID_SMC: + *sram_offset = 0; + *sram_addr_reg_offset = 0; + *sram_data_reg_offset = 0; + break; +#endif + + case AMDGPU_UCODE_ID_CP_CE: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_PFP: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_ME: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_MEC1: + *sram_offset = 0x10000; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_MEC2: + *sram_offset = 0x10000; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_RLC_G: + *sram_offset = 0x2000; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_SDMA0: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA); + break; + +/* TODO: needs to confirm */ +#if 0 + case AMDGPU_UCODE_ID_SDMA1: + *sram_offset = ; + *sram_addr_reg_offset = ; + break; + + case AMDGPU_UCODE_ID_UVD: + *sram_offset = ; + *sram_addr_reg_offset = ; + break; + + case AMDGPU_UCODE_ID_VCE: + *sram_offset = ; + *sram_addr_reg_offset = ; + break; +#endif + + case AMDGPU_UCODE_ID_MAXIMUM: + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static bool psp_v12_0_compare_sram_data(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + enum AMDGPU_UCODE_ID ucode_type) +{ + int err = 0; + unsigned int fw_sram_reg_val = 0; + unsigned int fw_sram_addr_reg_offset = 0; + unsigned int fw_sram_data_reg_offset = 0; + unsigned int ucode_size; + uint32_t *ucode_mem = NULL; + struct amdgpu_device *adev = psp->adev; + + err = psp_v12_0_sram_map(adev, &fw_sram_reg_val, &fw_sram_addr_reg_offset, + &fw_sram_data_reg_offset, ucode_type); + if (err) + return false; + + WREG32(fw_sram_addr_reg_offset, fw_sram_reg_val); + + ucode_size = ucode->ucode_size; + ucode_mem = (uint32_t *)ucode->kaddr; + while (ucode_size) { + fw_sram_reg_val = RREG32(fw_sram_data_reg_offset); + + if (*ucode_mem != fw_sram_reg_val) + return false; + + ucode_mem++; + /* 4 bytes */ + ucode_size -= 4; + } + + return true; +} + +static int psp_v12_0_mode1_reset(struct psp_context *psp) +{ + int ret; + uint32_t offset; + struct amdgpu_device *adev = psp->adev; + + offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64); + + ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false); + + if (ret) { + DRM_INFO("psp is not working correctly before mode1 reset!\n"); + return -EINVAL; + } + + /*send the mode 1 reset command*/ + WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST); + + msleep(500); + + offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); + + ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false); + + if (ret) { + DRM_INFO("psp mode 1 reset failed!\n"); + return -EINVAL; + } + + DRM_INFO("psp mode1 reset succeed \n"); + + return 0; +} + +static const struct psp_funcs psp_v12_0_funcs = { + .init_microcode = psp_v12_0_init_microcode, + .bootloader_load_sysdrv = psp_v12_0_bootloader_load_sysdrv, + .bootloader_load_sos = psp_v12_0_bootloader_load_sos, + .ring_init = psp_v12_0_ring_init, + .ring_create = psp_v12_0_ring_create, + .ring_stop = psp_v12_0_ring_stop, + .ring_destroy = psp_v12_0_ring_destroy, + .cmd_submit = psp_v12_0_cmd_submit, + .compare_sram_data = psp_v12_0_compare_sram_data, + .mode1_reset = psp_v12_0_mode1_reset, +}; + +void psp_v12_0_set_psp_funcs(struct psp_context *psp) +{ + psp->funcs = &psp_v12_0_funcs; +} diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.h new file mode 100644 index 000000000000..241693ab1990 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __PSP_V12_0_H__ +#define __PSP_V12_0_H__ + +#include "amdgpu_psp.h" + +void psp_v12_0_set_psp_funcs(struct psp_context *psp); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 0487e3a4e9e7..fdc00938327b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -24,7 +24,9 @@ */ #include <linux/firmware.h> -#include <drm/drmP.h> +#include <linux/module.h> +#include <linux/pci.h> + #include "amdgpu.h" #include "amdgpu_psp.h" #include "amdgpu_ucode.h" @@ -37,6 +39,9 @@ #include "sdma0/sdma0_4_0_offset.h" #include "nbio/nbio_6_1_offset.h" +#include "oss/osssys_4_0_offset.h" +#include "oss/osssys_4_0_sh_mask.h" + MODULE_FIRMWARE("amdgpu/vega10_sos.bin"); MODULE_FIRMWARE("amdgpu/vega10_asd.bin"); MODULE_FIRMWARE("amdgpu/vega12_sos.bin"); @@ -47,6 +52,10 @@ MODULE_FIRMWARE("amdgpu/vega12_asd.bin"); static uint32_t sos_old_versions[] = {1517616, 1510592, 1448594, 1446554}; +static bool psp_v3_1_support_vmr_ring(struct psp_context *psp); +static int psp_v3_1_ring_stop(struct psp_context *psp, + enum psp_ring_type ring_type); + static int psp_v3_1_init_microcode(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -146,7 +155,7 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) /* Provide the sys driver to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (uint32_t)(psp->fw_pri_mc_addr >> 20)); - psp_gfxdrv_command_reg = 1 << 16; + psp_gfxdrv_command_reg = PSP_BL__LOAD_SYSDRV; WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, psp_gfxdrv_command_reg); @@ -209,7 +218,7 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp) /* Provide the PSP secure OS to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (uint32_t)(psp->fw_pri_mc_addr >> 20)); - psp_gfxdrv_command_reg = 2 << 16; + psp_gfxdrv_command_reg = PSP_BL__LOAD_SOSDRV; WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, psp_gfxdrv_command_reg); @@ -252,6 +261,37 @@ static int psp_v3_1_ring_init(struct psp_context *psp, return 0; } +static void psp_v3_1_reroute_ih(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + uint32_t tmp; + + /* Change IH ring for VMC */ + tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1244b); + tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1); + tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); + + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 3); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET); + + mdelay(20); + psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + + /* Change IH ring for UMC */ + tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b); + tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); + + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 4); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET); + + mdelay(20); + psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); +} + static int psp_v3_1_ring_create(struct psp_context *psp, enum psp_ring_type ring_type) { @@ -260,27 +300,59 @@ static int psp_v3_1_ring_create(struct psp_context *psp, struct psp_ring *ring = &psp->km_ring; struct amdgpu_device *adev = psp->adev; - /* Write low address of the ring to C2PMSG_69 */ - psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); - /* Write high address of the ring to C2PMSG_70 */ - psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); - /* Write size of ring to C2PMSG_71 */ - psp_ring_reg = ring->ring_size; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); - /* Write the ring initialization command to C2PMSG_64 */ - psp_ring_reg = ring_type; - psp_ring_reg = psp_ring_reg << 16; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); - - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); - - /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + psp_v3_1_reroute_ih(psp); + + if (psp_v3_1_support_vmr_ring(psp)) { + ret = psp_v3_1_ring_stop(psp, ring_type); + if (ret) { + DRM_ERROR("psp_v3_1_ring_stop_sriov failed!\n"); + return ret; + } + + /* Write low address of the ring to C2PMSG_102 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg); + /* Write high address of the ring to C2PMSG_103 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg); + /* No size initialization for sriov */ + /* Write the ring initialization command to C2PMSG_101 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg); + + /* there might be hardware handshake issue which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, + mmMP0_SMN_C2PMSG_101), 0x80000000, + 0x8000FFFF, false); + } else { + + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be hardware handshake issue which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, + mmMP0_SMN_C2PMSG_64), 0x80000000, + 0x8000FFFF, false); + } return ret; } @@ -291,16 +363,31 @@ static int psp_v3_1_ring_stop(struct psp_context *psp, unsigned int psp_ring_reg = 0; struct amdgpu_device *adev = psp->adev; - /* Write the ring destroy command to C2PMSG_64 */ - psp_ring_reg = 3 << 16; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); - - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); - - /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + if (psp_v3_1_support_vmr_ring(psp)) { + /* Write the Destroy GPCOM ring command to C2PMSG_101 */ + psp_ring_reg = GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg); + + /* there might be handshake issue which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + } else { + /* Write the ring destroy command to C2PMSG_64 */ + psp_ring_reg = 3 << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be handshake issue which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + } return ret; } @@ -324,7 +411,6 @@ static int psp_v3_1_ring_destroy(struct psp_context *psp, } static int psp_v3_1_cmd_submit(struct psp_context *psp, - struct amdgpu_firmware_info *ucode, uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, int index) { @@ -339,7 +425,10 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp, uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; /* KM (GPCOM) prepare write pointer */ - psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); + if (psp_v3_1_support_vmr_ring(psp)) + psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); + else + psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); /* Update KM RB frame pointer to new frame */ /* write_frame ptr increments by size of rb_frame in bytes */ @@ -365,10 +454,17 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp, write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); write_frame->fence_value = index; + amdgpu_asic_flush_hdp(adev, NULL); /* Update the write Pointer in DWORDs */ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); + if (psp_v3_1_support_vmr_ring(psp)) { + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg); + /* send interrupt to PSP for SRIOV ring write pointer update */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); return 0; } @@ -538,6 +634,14 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp) return 0; } +static bool psp_v3_1_support_vmr_ring(struct psp_context *psp) +{ + if (amdgpu_sriov_vf(psp->adev)) + return true; + + return false; +} + static const struct psp_funcs psp_v3_1_funcs = { .init_microcode = psp_v3_1_init_microcode, .bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv, @@ -550,6 +654,7 @@ static const struct psp_funcs psp_v3_1_funcs = { .compare_sram_data = psp_v3_1_compare_sram_data, .smu_reload_quirk = psp_v3_1_smu_reload_quirk, .mode1_reset = psp_v3_1_mode1_reset, + .support_vmr_ring = psp_v3_1_support_vmr_ring, }; void psp_v3_1_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index cca3552b36ed..a10175838013 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -21,8 +21,11 @@ * * Authors: Alex Deucher */ + +#include <linux/delay.h> #include <linux/firmware.h> -#include <drm/drmP.h> +#include <linux/module.h> + #include "amdgpu.h" #include "amdgpu_ucode.h" #include "amdgpu_trace.h" @@ -574,7 +577,7 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring) tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) @@ -870,8 +873,8 @@ static int sdma_v2_4_sw_init(void *handle) r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, (i == 0) ? - AMDGPU_SDMA_IRQ_TRAP0 : - AMDGPU_SDMA_IRQ_TRAP1); + AMDGPU_SDMA_IRQ_INSTANCE0 : + AMDGPU_SDMA_IRQ_INSTANCE1); if (r) return r; } @@ -1006,7 +1009,7 @@ static int sdma_v2_4_set_trap_irq_state(struct amdgpu_device *adev, u32 sdma_cntl; switch (type) { - case AMDGPU_SDMA_IRQ_TRAP0: + case AMDGPU_SDMA_IRQ_INSTANCE0: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET); @@ -1022,7 +1025,7 @@ static int sdma_v2_4_set_trap_irq_state(struct amdgpu_device *adev, break; } break; - case AMDGPU_SDMA_IRQ_TRAP1: + case AMDGPU_SDMA_IRQ_INSTANCE1: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 0ce8331baeb2..5f4e2c616241 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -21,8 +21,11 @@ * * Authors: Alex Deucher */ + +#include <linux/delay.h> #include <linux/firmware.h> -#include <drm/drmP.h> +#include <linux/module.h> + #include "amdgpu.h" #include "amdgpu_ucode.h" #include "amdgpu_trace.h" @@ -846,7 +849,7 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring) tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) @@ -1154,8 +1157,8 @@ static int sdma_v3_0_sw_init(void *handle) r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, (i == 0) ? - AMDGPU_SDMA_IRQ_TRAP0 : - AMDGPU_SDMA_IRQ_TRAP1); + AMDGPU_SDMA_IRQ_INSTANCE0 : + AMDGPU_SDMA_IRQ_INSTANCE1); if (r) return r; } @@ -1340,7 +1343,7 @@ static int sdma_v3_0_set_trap_irq_state(struct amdgpu_device *adev, u32 sdma_cntl; switch (type) { - case AMDGPU_SDMA_IRQ_TRAP0: + case AMDGPU_SDMA_IRQ_INSTANCE0: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET); @@ -1356,7 +1359,7 @@ static int sdma_v3_0_set_trap_irq_state(struct amdgpu_device *adev, break; } break; - case AMDGPU_SDMA_IRQ_TRAP1: + case AMDGPU_SDMA_IRQ_INSTANCE1: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index c816e55d43a9..4ef4d31f5231 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -21,8 +21,11 @@ * */ +#include <linux/delay.h> #include <linux/firmware.h> -#include <drm/drmP.h> +#include <linux/module.h> +#include <linux/pci.h> + #include "amdgpu.h" #include "amdgpu_ucode.h" #include "amdgpu_trace.h" @@ -31,6 +34,18 @@ #include "sdma0/sdma0_4_2_sh_mask.h" #include "sdma1/sdma1_4_2_offset.h" #include "sdma1/sdma1_4_2_sh_mask.h" +#include "sdma2/sdma2_4_2_2_offset.h" +#include "sdma2/sdma2_4_2_2_sh_mask.h" +#include "sdma3/sdma3_4_2_2_offset.h" +#include "sdma3/sdma3_4_2_2_sh_mask.h" +#include "sdma4/sdma4_4_2_2_offset.h" +#include "sdma4/sdma4_4_2_2_sh_mask.h" +#include "sdma5/sdma5_4_2_2_offset.h" +#include "sdma5/sdma5_4_2_2_sh_mask.h" +#include "sdma6/sdma6_4_2_2_offset.h" +#include "sdma6/sdma6_4_2_2_sh_mask.h" +#include "sdma7/sdma7_4_2_2_offset.h" +#include "sdma7/sdma7_4_2_2_sh_mask.h" #include "hdp/hdp_4_0_offset.h" #include "sdma0/sdma0_4_1_default.h" @@ -41,6 +56,8 @@ #include "ivsrcid/sdma0/irqsrcs_sdma0_4_0.h" #include "ivsrcid/sdma1/irqsrcs_sdma1_4_0.h" +#include "amdgpu_ras.h" + MODULE_FIRMWARE("amdgpu/vega10_sdma.bin"); MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin"); MODULE_FIRMWARE("amdgpu/vega12_sdma.bin"); @@ -50,6 +67,8 @@ MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin"); MODULE_FIRMWARE("amdgpu/raven_sdma.bin"); MODULE_FIRMWARE("amdgpu/picasso_sdma.bin"); MODULE_FIRMWARE("amdgpu/raven2_sdma.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_sdma.bin"); +MODULE_FIRMWARE("amdgpu/renoir_sdma.bin"); #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L #define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L @@ -154,7 +173,6 @@ static const struct soc15_reg_golden golden_settings_sdma0_4_2[] = SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), - SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xFE000000, 0x00000000), }; static const struct soc15_reg_golden golden_settings_sdma1_4_2[] = { @@ -184,7 +202,6 @@ static const struct soc15_reg_golden golden_settings_sdma1_4_2[] = { SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0), - SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_WATERMK, 0xFE000000, 0x00000000), }; static const struct soc15_reg_golden golden_settings_sdma_rv1[] = @@ -199,11 +216,121 @@ static const struct soc15_reg_golden golden_settings_sdma_rv2[] = SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00003001) }; +static const struct soc15_reg_golden golden_settings_sdma_arct[] = +{ + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002) +}; + +static const struct soc15_reg_golden golden_settings_sdma_4_3[] = { + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003fff07, 0x40000051), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000) +}; + static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 offset) { - return ( 0 == instance ? (adev->reg_offset[SDMA0_HWIP][0][0] + offset) : - (adev->reg_offset[SDMA1_HWIP][0][0] + offset)); + switch (instance) { + case 0: + return (adev->reg_offset[SDMA0_HWIP][0][0] + offset); + case 1: + return (adev->reg_offset[SDMA1_HWIP][0][0] + offset); + case 2: + return (adev->reg_offset[SDMA2_HWIP][0][1] + offset); + case 3: + return (adev->reg_offset[SDMA3_HWIP][0][1] + offset); + case 4: + return (adev->reg_offset[SDMA4_HWIP][0][1] + offset); + case 5: + return (adev->reg_offset[SDMA5_HWIP][0][1] + offset); + case 6: + return (adev->reg_offset[SDMA6_HWIP][0][1] + offset); + case 7: + return (adev->reg_offset[SDMA7_HWIP][0][1] + offset); + default: + break; + } + return 0; +} + +static unsigned sdma_v4_0_seq_to_irq_id(int seq_num) +{ + switch (seq_num) { + case 0: + return SOC15_IH_CLIENTID_SDMA0; + case 1: + return SOC15_IH_CLIENTID_SDMA1; + case 2: + return SOC15_IH_CLIENTID_SDMA2; + case 3: + return SOC15_IH_CLIENTID_SDMA3; + case 4: + return SOC15_IH_CLIENTID_SDMA4; + case 5: + return SOC15_IH_CLIENTID_SDMA5; + case 6: + return SOC15_IH_CLIENTID_SDMA6; + case 7: + return SOC15_IH_CLIENTID_SDMA7; + default: + break; + } + return -EINVAL; +} + +static int sdma_v4_0_irq_id_to_seq(unsigned client_id) +{ + switch (client_id) { + case SOC15_IH_CLIENTID_SDMA0: + return 0; + case SOC15_IH_CLIENTID_SDMA1: + return 1; + case SOC15_IH_CLIENTID_SDMA2: + return 2; + case SOC15_IH_CLIENTID_SDMA3: + return 3; + case SOC15_IH_CLIENTID_SDMA4: + return 4; + case SOC15_IH_CLIENTID_SDMA5: + return 5; + case SOC15_IH_CLIENTID_SDMA6: + return 6; + case SOC15_IH_CLIENTID_SDMA7: + return 7; + default: + break; + } + return -EINVAL; } static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) @@ -211,11 +338,11 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: soc15_program_register_sequence(adev, - golden_settings_sdma_4, - ARRAY_SIZE(golden_settings_sdma_4)); + golden_settings_sdma_4, + ARRAY_SIZE(golden_settings_sdma_4)); soc15_program_register_sequence(adev, - golden_settings_sdma_vg10, - ARRAY_SIZE(golden_settings_sdma_vg10)); + golden_settings_sdma_vg10, + ARRAY_SIZE(golden_settings_sdma_vg10)); break; case CHIP_VEGA12: soc15_program_register_sequence(adev, @@ -236,6 +363,11 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_sdma1_4_2, ARRAY_SIZE(golden_settings_sdma1_4_2)); break; + case CHIP_ARCTURUS: + soc15_program_register_sequence(adev, + golden_settings_sdma_arct, + ARRAY_SIZE(golden_settings_sdma_arct)); + break; case CHIP_RAVEN: soc15_program_register_sequence(adev, golden_settings_sdma_4_1, @@ -249,11 +381,53 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_sdma_rv1, ARRAY_SIZE(golden_settings_sdma_rv1)); break; + case CHIP_RENOIR: + soc15_program_register_sequence(adev, + golden_settings_sdma_4_3, + ARRAY_SIZE(golden_settings_sdma_4_3)); + break; default: break; } } +static int sdma_v4_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst) +{ + int err = 0; + const struct sdma_firmware_header_v1_0 *hdr; + + err = amdgpu_ucode_validate(sdma_inst->fw); + if (err) + return err; + + hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data; + sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version); + sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version); + + if (sdma_inst->feature_version >= 20) + sdma_inst->burst_nop = true; + + return 0; +} + +static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (adev->sdma.instance[i].fw != NULL) + release_firmware(adev->sdma.instance[i].fw); + + /* arcturus shares the same FW memory across + all SDMA isntances */ + if (adev->asic_type == CHIP_ARCTURUS) + break; + } + + memset((void*)adev->sdma.instance, 0, + sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES); +} + /** * sdma_v4_0_init_microcode - load ucode images from disk * @@ -273,7 +447,6 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) int err = 0, i; struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; - const struct sdma_firmware_header_v1_0 *hdr; DRM_DEBUG("\n"); @@ -295,30 +468,52 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) else chip_name = "raven"; break; + case CHIP_ARCTURUS: + chip_name = "arcturus"; + break; + case CHIP_RENOIR: + chip_name = "renoir"; + break; default: BUG(); } - for (i = 0; i < adev->sdma.num_instances; i++) { - if (i == 0) - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); - else - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name); - err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); - if (err) - goto out; - err = amdgpu_ucode_validate(adev->sdma.instance[i].fw); - if (err) - goto out; - hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; - adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version); - adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); - if (adev->sdma.instance[i].feature_version >= 20) - adev->sdma.instance[i].burst_nop = true; - DRM_DEBUG("psp_load == '%s'\n", - adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); - - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); + + err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev); + if (err) + goto out; + + err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[0]); + if (err) + goto out; + + for (i = 1; i < adev->sdma.num_instances; i++) { + if (adev->asic_type == CHIP_ARCTURUS) { + /* Acturus will leverage the same FW memory + for every SDMA instance */ + memcpy((void*)&adev->sdma.instance[i], + (void*)&adev->sdma.instance[0], + sizeof(struct amdgpu_sdma_instance)); + } + else { + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i); + + err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); + if (err) + goto out; + + err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[i]); + if (err) + goto out; + } + } + + DRM_DEBUG("psp_load == '%s'\n", + adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + for (i = 0; i < adev->sdma.num_instances; i++) { info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; info->fw = adev->sdma.instance[i].fw; @@ -327,13 +522,11 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); } } + out: if (err) { DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name); - for (i = 0; i < adev->sdma.num_instances; i++) { - release_firmware(adev->sdma.instance[i].fw); - adev->sdma.instance[i].fw = NULL; - } + sdma_v4_0_destroy_inst_ctx(adev); } return err; } @@ -554,16 +747,13 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 ref_and_mask = 0; - const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; - if (ring->me == 0) - ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0; - else - ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1; + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; sdma_v4_0_wait_reg_mem(ring, 0, 1, - adev->nbio_funcs->get_hdp_flush_done_offset(adev), - adev->nbio_funcs->get_hdp_flush_req_offset(adev), + adev->nbio.funcs->get_hdp_flush_done_offset(adev), + adev->nbio.funcs->get_hdp_flush_req_offset(adev), ref_and_mask, ref_and_mask, 10); } @@ -615,26 +805,27 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se */ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; - struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; + struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 rb_cntl, ib_cntl; - int i; + int i, unset = 0; + + for (i = 0; i < adev->sdma.num_instances; i++) { + sdma[i] = &adev->sdma.instance[i].ring; - if ((adev->mman.buffer_funcs_ring == sdma0) || - (adev->mman.buffer_funcs_ring == sdma1)) + if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) { amdgpu_ttm_set_buffer_funcs_status(adev, false); + unset = 1; + } - for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl); - } - sdma0->sched.ready = false; - sdma1->sched.ready = false; + sdma[i]->sched.ready = false; + } } /** @@ -658,16 +849,20 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev) */ static void sdma_v4_0_page_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].page; - struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].page; + struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 rb_cntl, ib_cntl; int i; - - if ((adev->mman.buffer_funcs_ring == sdma0) || - (adev->mman.buffer_funcs_ring == sdma1)) - amdgpu_ttm_set_buffer_funcs_status(adev, false); + bool unset = false; for (i = 0; i < adev->sdma.num_instances; i++) { + sdma[i] = &adev->sdma.instance[i].page; + + if ((adev->mman.buffer_funcs_ring == sdma[i]) && + (unset == false)) { + amdgpu_ttm_set_buffer_funcs_status(adev, false); + unset = true; + } + rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, RB_ENABLE, 0); @@ -676,10 +871,9 @@ static void sdma_v4_0_page_stop(struct amdgpu_device *adev) ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_ENABLE, 0); WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl); - } - sdma0->sched.ready = false; - sdma1->sched.ready = false; + sdma[i]->sched.ready = false; + } } /** @@ -849,7 +1043,7 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i) wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL); wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, - F32_POLL_ENABLE, amdgpu_sriov_vf(adev)); + F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0); WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, wptr_poll_cntl); /* enable DMA RB */ @@ -940,7 +1134,7 @@ static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i) wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL); wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_PAGE_RB_WPTR_POLL_CNTL, - F32_POLL_ENABLE, amdgpu_sriov_vf(adev)); + F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0); WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, wptr_poll_cntl); /* enable DMA RB */ @@ -1013,6 +1207,7 @@ static void sdma_v4_0_init_pg(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_RAVEN: + case CHIP_RENOIR: sdma_v4_1_init_power_gating(adev); sdma_v4_1_update_power_gating(adev, true); break; @@ -1090,7 +1285,7 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) static int sdma_v4_0_start(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - int i, r; + int i, r = 0; if (amdgpu_sriov_vf(adev)) { sdma_v4_0_ctx_switch_enable(adev, false); @@ -1207,7 +1402,7 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring) tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) @@ -1468,8 +1663,10 @@ static int sdma_v4_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - if (adev->asic_type == CHIP_RAVEN) + if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) adev->sdma.num_instances = 1; + else if (adev->asic_type == CHIP_ARCTURUS) + adev->sdma.num_instances = 8; else adev->sdma.num_instances = 2; @@ -1493,6 +1690,20 @@ static int sdma_v4_0_early_init(void *handle) return 0; } +static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, + void *err_data, + struct amdgpu_iv_entry *entry); + +static int sdma_v4_0_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct ras_ih_if ih_info = { + .cb = sdma_v4_0_process_ras_data_cb, + }; + + return amdgpu_sdma_ras_late_init(adev, &ih_info); +} + static int sdma_v4_0_sw_init(void *handle) { struct amdgpu_ring *ring; @@ -1500,16 +1711,22 @@ static int sdma_v4_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_TRAP, - &adev->sdma.trap_irq); - if (r) - return r; + for (i = 0; i < adev->sdma.num_instances; i++) { + r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i), + SDMA0_4_0__SRCID__SDMA_TRAP, + &adev->sdma.trap_irq); + if (r) + return r; + } - /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_TRAP, - &adev->sdma.trap_irq); - if (r) - return r; + /* SDMA SRAM ECC event */ + for (i = 0; i < adev->sdma.num_instances; i++) { + r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i), + SDMA0_4_0__SRCID__SDMA_SRAM_ECC, + &adev->sdma.ecc_irq); + if (r) + return r; + } for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; @@ -1523,11 +1740,8 @@ static int sdma_v4_0_sw_init(void *handle) ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; sprintf(ring->name, "sdma%d", i); - r = amdgpu_ring_init(adev, ring, 1024, - &adev->sdma.trap_irq, - (i == 0) ? - AMDGPU_SDMA_IRQ_TRAP0 : - AMDGPU_SDMA_IRQ_TRAP1); + r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, + AMDGPU_SDMA_IRQ_INSTANCE0 + i); if (r) return r; @@ -1545,9 +1759,7 @@ static int sdma_v4_0_sw_init(void *handle) sprintf(ring->name, "page%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, - (i == 0) ? - AMDGPU_SDMA_IRQ_TRAP0 : - AMDGPU_SDMA_IRQ_TRAP1); + AMDGPU_SDMA_IRQ_INSTANCE0 + i); if (r) return r; } @@ -1561,16 +1773,15 @@ static int sdma_v4_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; + amdgpu_sdma_ras_fini(adev); + for (i = 0; i < adev->sdma.num_instances; i++) { amdgpu_ring_fini(&adev->sdma.instance[i].ring); if (adev->sdma.has_page_queue) amdgpu_ring_fini(&adev->sdma.instance[i].page); } - for (i = 0; i < adev->sdma.num_instances; i++) { - release_firmware(adev->sdma.instance[i].fw); - adev->sdma.instance[i].fw = NULL; - } + sdma_v4_0_destroy_inst_ctx(adev); return 0; } @@ -1580,11 +1791,13 @@ static int sdma_v4_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->set_powergating_by_smu) + if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_powergating_by_smu) || + (adev->asic_type == CHIP_RENOIR && !adev->in_gpu_reset)) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false); - sdma_v4_0_init_golden_registers(adev); + if (!amdgpu_sriov_vf(adev)) + sdma_v4_0_init_golden_registers(adev); r = sdma_v4_0_start(adev); @@ -1594,15 +1807,22 @@ static int sdma_v4_0_hw_init(void *handle) static int sdma_v4_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; if (amdgpu_sriov_vf(adev)) return 0; + for (i = 0; i < adev->sdma.num_instances; i++) { + amdgpu_irq_put(adev, &adev->sdma.ecc_irq, + AMDGPU_SDMA_IRQ_INSTANCE0 + i); + } + sdma_v4_0_ctx_switch_enable(adev, false); sdma_v4_0_enable(adev, false); - if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs - && adev->powerplay.pp_funcs->set_powergating_by_smu) + if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs + && adev->powerplay.pp_funcs->set_powergating_by_smu) || + adev->asic_type == CHIP_RENOIR) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true); return 0; @@ -1639,15 +1859,17 @@ static bool sdma_v4_0_is_idle(void *handle) static int sdma_v4_0_wait_for_idle(void *handle) { - unsigned i; - u32 sdma0, sdma1; + unsigned i, j; + u32 sdma[AMDGPU_MAX_SDMA_INSTANCES]; struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->usec_timeout; i++) { - sdma0 = RREG32_SDMA(0, mmSDMA0_STATUS_REG); - sdma1 = RREG32_SDMA(1, mmSDMA0_STATUS_REG); - - if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK) + for (j = 0; j < adev->sdma.num_instances; j++) { + sdma[j] = RREG32_SDMA(j, mmSDMA0_STATUS_REG); + if (!(sdma[j] & SDMA0_STATUS_REG__IDLE_MASK)) + break; + } + if (j == adev->sdma.num_instances) return 0; udelay(1); } @@ -1666,13 +1888,12 @@ static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev, unsigned type, enum amdgpu_interrupt_state state) { - unsigned int instance = (type == AMDGPU_SDMA_IRQ_TRAP0) ? 0 : 1; u32 sdma_cntl; - sdma_cntl = RREG32_SDMA(instance, mmSDMA0_CNTL); + sdma_cntl = RREG32_SDMA(type, mmSDMA0_CNTL); sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); - WREG32_SDMA(instance, mmSDMA0_CNTL, sdma_cntl); + WREG32_SDMA(type, mmSDMA0_CNTL, sdma_cntl); return 0; } @@ -1684,17 +1905,7 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, uint32_t instance; DRM_DEBUG("IH: SDMA trap\n"); - switch (entry->client_id) { - case SOC15_IH_CLIENTID_SDMA0: - instance = 0; - break; - case SOC15_IH_CLIENTID_SDMA1: - instance = 1; - break; - default: - return 0; - } - + instance = sdma_v4_0_irq_id_to_seq(entry->client_id); switch (entry->ring_id) { case 0: amdgpu_fence_process(&adev->sdma.instance[instance].ring); @@ -1714,6 +1925,29 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, return 0; } +static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, + void *err_data, + struct amdgpu_iv_entry *entry) +{ + int instance; + + /* When “Full RAS” is enabled, the per-IP interrupt sources should + * be disabled and the driver should only look for the aggregated + * interrupt via sync flood + */ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + goto out; + + instance = sdma_v4_0_irq_id_to_seq(entry->client_id); + if (instance < 0) + goto out; + + amdgpu_sdma_process_ras_data_cb(adev, err_data, entry); + +out: + return AMDGPU_RAS_SUCCESS; +} + static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -1722,16 +1956,9 @@ static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev, DRM_ERROR("Illegal instruction in SDMA command stream\n"); - switch (entry->client_id) { - case SOC15_IH_CLIENTID_SDMA0: - instance = 0; - break; - case SOC15_IH_CLIENTID_SDMA1: - instance = 1; - break; - default: + instance = sdma_v4_0_irq_id_to_seq(entry->client_id); + if (instance < 0) return 0; - } switch (entry->ring_id) { case 0: @@ -1741,66 +1968,55 @@ static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev, return 0; } +static int sdma_v4_0_set_ecc_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 sdma_edc_config; + + sdma_edc_config = RREG32_SDMA(type, mmSDMA0_EDC_CONFIG); + sdma_edc_config = REG_SET_FIELD(sdma_edc_config, SDMA0_EDC_CONFIG, ECC_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SDMA(type, mmSDMA0_EDC_CONFIG, sdma_edc_config); + + return 0; +} + static void sdma_v4_0_update_medium_grain_clock_gating( struct amdgpu_device *adev, bool enable) { uint32_t data, def; + int i; if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { - /* enable sdma0 clock gating */ - def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL)); - data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); - if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data); - - if (adev->sdma.num_instances > 1) { - def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL)); - data &= ~(SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK); + for (i = 0; i < adev->sdma.num_instances; i++) { + def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL); + data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data); + WREG32_SDMA(i, mmSDMA0_CLK_CTRL, data); } } else { - /* disable sdma0 clock gating */ - def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL)); - data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); - - if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data); - - if (adev->sdma.num_instances > 1) { - def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL)); - data |= (SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK); + for (i = 0; i < adev->sdma.num_instances; i++) { + def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL); + data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data); + WREG32_SDMA(i, mmSDMA0_CLK_CTRL, data); } } } @@ -1811,34 +2027,23 @@ static void sdma_v4_0_update_medium_grain_light_sleep( bool enable) { uint32_t data, def; + int i; if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) { - /* 1-not override: enable sdma0 mem light sleep */ - def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL)); - data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; - if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); - - /* 1-not override: enable sdma1 mem light sleep */ - if (adev->sdma.num_instances > 1) { - def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL)); - data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + for (i = 0; i < adev->sdma.num_instances; i++) { + /* 1-not override: enable sdma mem light sleep */ + def = data = RREG32_SDMA(0, mmSDMA0_POWER_CNTL); + data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data); + WREG32_SDMA(0, mmSDMA0_POWER_CNTL, data); } } else { - /* 0-override:disable sdma0 mem light sleep */ - def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL)); - data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; - if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); - - /* 0-override:disable sdma1 mem light sleep */ - if (adev->sdma.num_instances > 1) { - def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL)); - data &= ~SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + for (i = 0; i < adev->sdma.num_instances; i++) { + /* 0-override:disable sdma mem light sleep */ + def = data = RREG32_SDMA(0, mmSDMA0_POWER_CNTL); + data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data); + WREG32_SDMA(0, mmSDMA0_POWER_CNTL, data); } } } @@ -1856,6 +2061,8 @@ static int sdma_v4_0_set_clockgating_state(void *handle, case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_ARCTURUS: + case CHIP_RENOIR: sdma_v4_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); sdma_v4_0_update_medium_grain_light_sleep(adev, @@ -1906,7 +2113,7 @@ static void sdma_v4_0_get_clockgating_state(void *handle, u32 *flags) const struct amd_ip_funcs sdma_v4_0_ip_funcs = { .name = "sdma_v4_0", .early_init = sdma_v4_0_early_init, - .late_init = NULL, + .late_init = sdma_v4_0_late_init, .sw_init = sdma_v4_0_sw_init, .sw_fini = sdma_v4_0_sw_fini, .hw_init = sdma_v4_0_hw_init, @@ -1926,7 +2133,43 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, + .get_rptr = sdma_v4_0_ring_get_rptr, + .get_wptr = sdma_v4_0_ring_get_wptr, + .set_wptr = sdma_v4_0_ring_set_wptr, + .emit_frame_size = + 6 + /* sdma_v4_0_ring_emit_hdp_flush */ + 3 + /* hdp invalidate */ + 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ + /* sdma_v4_0_ring_emit_vm_flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ + .emit_ib = sdma_v4_0_ring_emit_ib, + .emit_fence = sdma_v4_0_ring_emit_fence, + .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, + .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, + .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, + .test_ring = sdma_v4_0_ring_test_ring, + .test_ib = sdma_v4_0_ring_test_ib, + .insert_nop = sdma_v4_0_ring_insert_nop, + .pad_ib = sdma_v4_0_ring_pad_ib, + .emit_wreg = sdma_v4_0_ring_emit_wreg, + .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/* + * On Arcturus, SDMA instance 5~7 has a different vmhub type(AMDGPU_MMHUB_1). + * So create a individual constant ring_funcs for those instances. + */ +static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs_2nd_mmhub = { + .type = AMDGPU_RING_TYPE_SDMA, + .align_mask = 0xf, + .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), + .support_64bit_ptrs = true, + .vmhub = AMDGPU_MMHUB_1, .get_rptr = sdma_v4_0_ring_get_rptr, .get_wptr = sdma_v4_0_ring_get_wptr, .set_wptr = sdma_v4_0_ring_set_wptr, @@ -1958,7 +2201,39 @@ static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, + .get_rptr = sdma_v4_0_ring_get_rptr, + .get_wptr = sdma_v4_0_page_ring_get_wptr, + .set_wptr = sdma_v4_0_page_ring_set_wptr, + .emit_frame_size = + 6 + /* sdma_v4_0_ring_emit_hdp_flush */ + 3 + /* hdp invalidate */ + 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ + /* sdma_v4_0_ring_emit_vm_flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ + .emit_ib = sdma_v4_0_ring_emit_ib, + .emit_fence = sdma_v4_0_ring_emit_fence, + .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, + .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, + .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, + .test_ring = sdma_v4_0_ring_test_ring, + .test_ib = sdma_v4_0_ring_test_ib, + .insert_nop = sdma_v4_0_ring_insert_nop, + .pad_ib = sdma_v4_0_ring_pad_ib, + .emit_wreg = sdma_v4_0_ring_emit_wreg, + .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs_2nd_mmhub = { + .type = AMDGPU_RING_TYPE_SDMA, + .align_mask = 0xf, + .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), + .support_64bit_ptrs = true, + .vmhub = AMDGPU_MMHUB_1, .get_rptr = sdma_v4_0_ring_get_rptr, .get_wptr = sdma_v4_0_page_ring_get_wptr, .set_wptr = sdma_v4_0_page_ring_set_wptr, @@ -1990,10 +2265,20 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) int i; for (i = 0; i < adev->sdma.num_instances; i++) { - adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs; + if (adev->asic_type == CHIP_ARCTURUS && i >= 5) + adev->sdma.instance[i].ring.funcs = + &sdma_v4_0_ring_funcs_2nd_mmhub; + else + adev->sdma.instance[i].ring.funcs = + &sdma_v4_0_ring_funcs; adev->sdma.instance[i].ring.me = i; if (adev->sdma.has_page_queue) { - adev->sdma.instance[i].page.funcs = &sdma_v4_0_page_ring_funcs; + if (adev->asic_type == CHIP_ARCTURUS && i >= 5) + adev->sdma.instance[i].page.funcs = + &sdma_v4_0_page_ring_funcs_2nd_mmhub; + else + adev->sdma.instance[i].page.funcs = + &sdma_v4_0_page_ring_funcs; adev->sdma.instance[i].page.me = i; } } @@ -2008,11 +2293,33 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_illegal_inst_irq_funcs = { .process = sdma_v4_0_process_illegal_inst_irq, }; +static const struct amdgpu_irq_src_funcs sdma_v4_0_ecc_irq_funcs = { + .set = sdma_v4_0_set_ecc_irq_state, + .process = amdgpu_sdma_process_ecc_irq, +}; + + + static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; + switch (adev->sdma.num_instances) { + case 1: + adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1; + adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1; + break; + case 8: + adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; + adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST; + break; + case 2: + default: + adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2; + adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2; + break; + } adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs; adev->sdma.illegal_inst_irq.funcs = &sdma_v4_0_illegal_inst_irq_funcs; + adev->sdma.ecc_irq.funcs = &sdma_v4_0_ecc_irq_funcs; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c new file mode 100644 index 000000000000..f4ad2990f973 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -0,0 +1,1744 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/delay.h> +#include <linux/firmware.h> +#include <linux/module.h> +#include <linux/pci.h> + +#include "amdgpu.h" +#include "amdgpu_ucode.h" +#include "amdgpu_trace.h" + +#include "gc/gc_10_1_0_offset.h" +#include "gc/gc_10_1_0_sh_mask.h" +#include "hdp/hdp_5_0_0_offset.h" +#include "ivsrcid/sdma0/irqsrcs_sdma0_5_0.h" +#include "ivsrcid/sdma1/irqsrcs_sdma1_5_0.h" + +#include "soc15_common.h" +#include "soc15.h" +#include "navi10_sdma_pkt_open.h" +#include "nbio_v2_3.h" +#include "sdma_v5_0.h" + +MODULE_FIRMWARE("amdgpu/navi10_sdma.bin"); +MODULE_FIRMWARE("amdgpu/navi10_sdma1.bin"); + +MODULE_FIRMWARE("amdgpu/navi14_sdma.bin"); +MODULE_FIRMWARE("amdgpu/navi14_sdma1.bin"); + +MODULE_FIRMWARE("amdgpu/navi12_sdma.bin"); +MODULE_FIRMWARE("amdgpu/navi12_sdma1.bin"); + +#define SDMA1_REG_OFFSET 0x600 +#define SDMA0_HYP_DEC_REG_START 0x5880 +#define SDMA0_HYP_DEC_REG_END 0x5893 +#define SDMA1_HYP_DEC_REG_OFFSET 0x20 + +static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev); +static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev); +static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev); +static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev); + +static const struct soc15_reg_golden golden_settings_sdma_5[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_UTCL1_PAGE, 0x00ffffff, 0x000c5c00), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_UTCL1_PAGE, 0x00ffffff, 0x000c5c00) +}; + +static const struct soc15_reg_golden golden_settings_sdma_nv10[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), +}; + +static const struct soc15_reg_golden golden_settings_sdma_nv14[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), +}; + +static const struct soc15_reg_golden golden_settings_sdma_nv12[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), +}; + +static u32 sdma_v5_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset) +{ + u32 base; + + if (internal_offset >= SDMA0_HYP_DEC_REG_START && + internal_offset <= SDMA0_HYP_DEC_REG_END) { + base = adev->reg_offset[GC_HWIP][0][1]; + if (instance == 1) + internal_offset += SDMA1_HYP_DEC_REG_OFFSET; + } else { + base = adev->reg_offset[GC_HWIP][0][0]; + if (instance == 1) + internal_offset += SDMA1_REG_OFFSET; + } + + return base + internal_offset; +} + +static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_NAVI10: + soc15_program_register_sequence(adev, + golden_settings_sdma_5, + (const u32)ARRAY_SIZE(golden_settings_sdma_5)); + soc15_program_register_sequence(adev, + golden_settings_sdma_nv10, + (const u32)ARRAY_SIZE(golden_settings_sdma_nv10)); + break; + case CHIP_NAVI14: + soc15_program_register_sequence(adev, + golden_settings_sdma_5, + (const u32)ARRAY_SIZE(golden_settings_sdma_5)); + soc15_program_register_sequence(adev, + golden_settings_sdma_nv14, + (const u32)ARRAY_SIZE(golden_settings_sdma_nv14)); + break; + case CHIP_NAVI12: + soc15_program_register_sequence(adev, + golden_settings_sdma_5, + (const u32)ARRAY_SIZE(golden_settings_sdma_5)); + soc15_program_register_sequence(adev, + golden_settings_sdma_nv12, + (const u32)ARRAY_SIZE(golden_settings_sdma_nv12)); + break; + default: + break; + } +} + +/** + * sdma_v5_0_init_microcode - load ucode images from disk + * + * @adev: amdgpu_device pointer + * + * Use the firmware interface to load the ucode images into + * the driver (not loaded into hw). + * Returns 0 on success, error on failure. + */ + +// emulation only, won't work on real chip +// navi10 real chip need to use PSP to load firmware +static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) +{ + const char *chip_name; + char fw_name[30]; + int err = 0, i; + struct amdgpu_firmware_info *info = NULL; + const struct common_firmware_header *header = NULL; + const struct sdma_firmware_header_v1_0 *hdr; + + DRM_DEBUG("\n"); + + switch (adev->asic_type) { + case CHIP_NAVI10: + chip_name = "navi10"; + break; + case CHIP_NAVI14: + chip_name = "navi14"; + break; + case CHIP_NAVI12: + chip_name = "navi12"; + break; + default: + BUG(); + } + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (i == 0) + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); + else + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name); + err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->sdma.instance[i].fw); + if (err) + goto out; + hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; + adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version); + adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); + if (adev->sdma.instance[i].feature_version >= 20) + adev->sdma.instance[i].burst_nop = true; + DRM_DEBUG("psp_load == '%s'\n", + adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; + info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; + info->fw = adev->sdma.instance[i].fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + } + } +out: + if (err) { + DRM_ERROR("sdma_v5_0: Failed to load firmware \"%s\"\n", fw_name); + for (i = 0; i < adev->sdma.num_instances; i++) { + release_firmware(adev->sdma.instance[i].fw); + adev->sdma.instance[i].fw = NULL; + } + } + return err; +} + +static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring) +{ + unsigned ret; + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE)); + amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, 1); + ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */ + amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ + + return ret; +} + +static void sdma_v5_0_ring_patch_cond_exec(struct amdgpu_ring *ring, + unsigned offset) +{ + unsigned cur; + + BUG_ON(offset > ring->buf_mask); + BUG_ON(ring->ring[offset] != 0x55aa55aa); + + cur = (ring->wptr - 1) & ring->buf_mask; + if (cur > offset) + ring->ring[offset] = cur - offset; + else + ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; +} + +/** + * sdma_v5_0_ring_get_rptr - get the current read pointer + * + * @ring: amdgpu ring pointer + * + * Get the current rptr from the hardware (NAVI10+). + */ +static uint64_t sdma_v5_0_ring_get_rptr(struct amdgpu_ring *ring) +{ + u64 *rptr; + + /* XXX check if swapping is necessary on BE */ + rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]); + + DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr); + return ((*rptr) >> 2); +} + +/** + * sdma_v5_0_ring_get_wptr - get the current write pointer + * + * @ring: amdgpu ring pointer + * + * Get the current wptr from the hardware (NAVI10+). + */ +static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u64 *wptr = NULL; + uint64_t local_wptr = 0; + + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]); + DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr); + *wptr = (*wptr) >> 2; + DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr); + } else { + u32 lowbit, highbit; + + wptr = &local_wptr; + lowbit = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2; + highbit = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; + + DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n", + ring->me, highbit, lowbit); + *wptr = highbit; + *wptr = (*wptr) << 32; + *wptr |= lowbit; + } + + return *wptr; +} + +/** + * sdma_v5_0_ring_set_wptr - commit the write pointer + * + * @ring: amdgpu ring pointer + * + * Write the wptr back to the hardware (NAVI10+). + */ +static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + DRM_DEBUG("Setting write pointer\n"); + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr << 2); + adev->wb.wb[ring->wptr_offs + 1] = upper_32_bits(ring->wptr << 2); + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + } else { + DRM_DEBUG("Not using doorbell -- " + "mmSDMA%i_GFX_RB_WPTR == 0x%08x " + "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", + ring->me, + lower_32_bits(ring->wptr << 2), + ring->me, + upper_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); + } +} + +static void sdma_v5_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); + int i; + + for (i = 0; i < count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + amdgpu_ring_write(ring, ring->funcs->nop | + SDMA_PKT_NOP_HEADER_COUNT(count - 1)); + else + amdgpu_ring_write(ring, ring->funcs->nop); +} + +/** + * sdma_v5_0_ring_emit_ib - Schedule an IB on the DMA engine + * + * @ring: amdgpu ring pointer + * @ib: IB object to schedule + * + * Schedule an IB in the DMA ring (NAVI10). + */ +static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid); + + /* IB packet must end on a 8 DW boundary */ + sdma_v5_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | + SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); + /* base must be 32 byte aligned */ + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, ib->length_dw); + amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr)); + amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr)); +} + +/** + * sdma_v5_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring + * + * @ring: amdgpu ring pointer + * + * Emit an hdp flush packet on the requested DMA ring. + */ +static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 ref_and_mask = 0; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; + + if (ring->me == 0) + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0; + else + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1; + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); + amdgpu_ring_write(ring, ref_and_mask); /* reference */ + amdgpu_ring_write(ring, ref_and_mask); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ +} + +/** + * sdma_v5_0_ring_emit_fence - emit a fence on the DMA ring + * + * @ring: amdgpu ring pointer + * @fence: amdgpu fence object + * + * Add a DMA fence packet to the ring to write + * the fence seq number and DMA trap packet to generate + * an interrupt if needed (NAVI10). + */ +static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) +{ + struct amdgpu_device *adev = ring->adev; + bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; + /* write the fence */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) | + SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */ + /* zero in first two bits */ + BUG_ON(addr & 0x3); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + + /* optionally write high bits as well */ + if (write64bit) { + addr += 4; + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) | + SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); + /* zero in first two bits */ + BUG_ON(addr & 0x3); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(seq)); + } + + /* Interrupt not work fine on GFX10.1 model yet. Use fallback instead */ + if ((flags & AMDGPU_FENCE_FLAG_INT) && adev->pdev->device != 0x50) { + /* generate an interrupt */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); + } +} + + +/** + * sdma_v5_0_gfx_stop - stop the gfx async dma engines + * + * @adev: amdgpu_device pointer + * + * Stop the gfx async dma ring buffers (NAVI10). + */ +static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev) +{ + struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; + struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; + u32 rb_cntl, ib_cntl; + int i; + + if ((adev->mman.buffer_funcs_ring == sdma0) || + (adev->mman.buffer_funcs_ring == sdma1)) + amdgpu_ttm_set_buffer_funcs_status(adev, false); + + for (i = 0; i < adev->sdma.num_instances; i++) { + rb_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + ib_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + } + + sdma0->sched.ready = false; + sdma1->sched.ready = false; +} + +/** + * sdma_v5_0_rlc_stop - stop the compute async dma engines + * + * @adev: amdgpu_device pointer + * + * Stop the compute async dma queues (NAVI10). + */ +static void sdma_v5_0_rlc_stop(struct amdgpu_device *adev) +{ + /* XXX todo */ +} + +/** + * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch + * + * @adev: amdgpu_device pointer + * @enable: enable/disable the DMA MEs context switch. + * + * Halt or unhalt the async dma engines context switch (NAVI10). + */ +static void sdma_v5_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) +{ + u32 f32_cntl, phase_quantum = 0; + int i; + + if (amdgpu_sdma_phase_quantum) { + unsigned value = amdgpu_sdma_phase_quantum; + unsigned unit = 0; + + while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) { + value = (value + 1) >> 1; + unit++; + } + if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) { + value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT); + unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT); + WARN_ONCE(1, + "clamping sdma_phase_quantum to %uK clock cycles\n", + value << unit); + } + phase_quantum = + value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT | + unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT; + } + + for (i = 0; i < adev->sdma.num_instances; i++) { + f32_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, + AUTO_CTXSW_ENABLE, enable ? 1 : 0); + if (enable && amdgpu_sdma_phase_quantum) { + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM), + phase_quantum); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM), + phase_quantum); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM), + phase_quantum); + } + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl); + } + +} + +/** + * sdma_v5_0_enable - stop the async dma engines + * + * @adev: amdgpu_device pointer + * @enable: enable/disable the DMA MEs. + * + * Halt or unhalt the async dma engines (NAVI10). + */ +static void sdma_v5_0_enable(struct amdgpu_device *adev, bool enable) +{ + u32 f32_cntl; + int i; + + if (enable == false) { + sdma_v5_0_gfx_stop(adev); + sdma_v5_0_rlc_stop(adev); + } + + for (i = 0; i < adev->sdma.num_instances; i++) { + f32_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl); + } +} + +/** + * sdma_v5_0_gfx_resume - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * + * Set up the gfx DMA ring buffers and enable them (NAVI10). + * Returns 0 for success, error for failure. + */ +static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + u32 rb_cntl, ib_cntl; + u32 rb_bufsz; + u32 wb_offset; + u32 doorbell; + u32 doorbell_offset; + u32 temp; + u32 wptr_poll_cntl; + u64 wptr_gpu_addr; + int i, r; + + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; + wb_offset = (ring->rptr_offs * 4); + + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); + + /* Set ring buffer size in dwords */ + rb_bufsz = order_base_2(ring->ring_size / 4); + rb_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); +#ifdef __BIG_ENDIAN + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, + RPTR_WRITEBACK_SWAP_ENABLE, 1); +#endif + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0); + + /* setup the wptr shadow polling */ + wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), + lower_32_bits(wptr_gpu_addr)); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), + upper_32_bits(wptr_gpu_addr)); + wptr_poll_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, + mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, + SDMA0_GFX_RB_WPTR_POLL_CNTL, + F32_POLL_ENABLE, 1); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), + wptr_poll_cntl); + + /* set the wb address whether it's enabled or not */ + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), + upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), + lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); + + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); + + ring->wptr = 0; + + /* before programing wptr to a less value, need set minor_ptr_update first */ + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); + + if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + } + + doorbell = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); + doorbell_offset = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET)); + + if (ring->use_doorbell) { + doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); + doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET, + OFFSET, ring->doorbell_index); + } else { + doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); + } + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); + + adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, + ring->doorbell_index, 20); + + if (amdgpu_sriov_vf(adev)) + sdma_v5_0_ring_set_wptr(ring); + + /* set minor_ptr_update to 0 after wptr programed */ + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); + + /* set utc l1 enable flag always to 1 */ + temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); + + /* enable MCBP */ + temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp); + + /* Set up RESP_MODE to non-copy addresses */ + temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); + temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp); + + /* program default cache read and write policy */ + temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE)); + /* clean read policy and write policy bits */ + temp &= 0xFF0FFF; + temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | (CACHE_WRITE_POLICY_L2__DEFAULT << 14)); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp); + + if (!amdgpu_sriov_vf(adev)) { + /* unhalt engine */ + temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp); + } + + /* enable DMA RB */ + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + + ib_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); +#ifdef __BIG_ENDIAN + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); +#endif + /* enable DMA IBs */ + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + + ring->sched.ready = true; + + if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ + sdma_v5_0_ctx_switch_enable(adev, true); + sdma_v5_0_enable(adev, true); + } + + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->sched.ready = false; + return r; + } + + if (adev->mman.buffer_funcs_ring == ring) + amdgpu_ttm_set_buffer_funcs_status(adev, true); + } + + return 0; +} + +/** + * sdma_v5_0_rlc_resume - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * + * Set up the compute DMA queues and enable them (NAVI10). + * Returns 0 for success, error for failure. + */ +static int sdma_v5_0_rlc_resume(struct amdgpu_device *adev) +{ + return 0; +} + +/** + * sdma_v5_0_load_microcode - load the sDMA ME ucode + * + * @adev: amdgpu_device pointer + * + * Loads the sDMA0/1 ucode. + * Returns 0 for success, -EINVAL if the ucode is not available. + */ +static int sdma_v5_0_load_microcode(struct amdgpu_device *adev) +{ + const struct sdma_firmware_header_v1_0 *hdr; + const __le32 *fw_data; + u32 fw_size; + int i, j; + + /* halt the MEs */ + sdma_v5_0_enable(adev, false); + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (!adev->sdma.instance[i].fw) + return -EINVAL; + + hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; + amdgpu_ucode_print_sdma_hdr(&hdr->header); + fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; + + fw_data = (const __le32 *) + (adev->sdma.instance[i].fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0); + + for (j = 0; j < fw_size; j++) { + if (amdgpu_emu_mode == 1 && j % 500 == 0) + msleep(1); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++)); + } + + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); + } + + return 0; +} + +/** + * sdma_v5_0_start - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * + * Set up the DMA engines and enable them (NAVI10). + * Returns 0 for success, error for failure. + */ +static int sdma_v5_0_start(struct amdgpu_device *adev) +{ + int r = 0; + + if (amdgpu_sriov_vf(adev)) { + sdma_v5_0_ctx_switch_enable(adev, false); + sdma_v5_0_enable(adev, false); + + /* set RB registers */ + r = sdma_v5_0_gfx_resume(adev); + return r; + } + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + r = sdma_v5_0_load_microcode(adev); + if (r) + return r; + + /* The value of mmSDMA_F32_CNTL is invalid the moment after loading fw */ + if (amdgpu_emu_mode == 1 && adev->pdev->device == 0x4d) + msleep(1000); + } + + /* unhalt the MEs */ + sdma_v5_0_enable(adev, true); + /* enable sdma ring preemption */ + sdma_v5_0_ctx_switch_enable(adev, true); + + /* start the gfx rings and rlc compute queues */ + r = sdma_v5_0_gfx_resume(adev); + if (r) + return r; + r = sdma_v5_0_rlc_resume(adev); + + return r; +} + +/** + * sdma_v5_0_ring_test_ring - simple async dma engine test + * + * @ring: amdgpu_ring structure holding ring information + * + * Test the DMA engine by writing using it to write an + * value to memory. (NAVI10). + * Returns 0 for success, error for failure. + */ +static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + unsigned i; + unsigned index; + int r; + u32 tmp; + u64 gpu_addr; + + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; + } + + gpu_addr = adev->wb.gpu_addr + (index * 4); + tmp = 0xCAFEDEAD; + adev->wb.wb[index] = cpu_to_le32(tmp); + + r = amdgpu_ring_alloc(ring, 5); + if (r) { + DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); + amdgpu_device_wb_free(adev, index); + return r; + } + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); + amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); + amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0)); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = le32_to_cpu(adev->wb.wb[index]); + if (tmp == 0xDEADBEEF) + break; + if (amdgpu_emu_mode == 1) + msleep(1); + else + udelay(1); + } + + if (i < adev->usec_timeout) { + if (amdgpu_emu_mode == 1) + DRM_INFO("ring test on %d succeeded in %d msecs\n", ring->idx, i); + else + DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); + } else { + DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", + ring->idx, tmp); + r = -EINVAL; + } + amdgpu_device_wb_free(adev, index); + + return r; +} + +/** + * sdma_v5_0_ring_test_ib - test an IB on the DMA engine + * + * @ring: amdgpu_ring structure holding ring information + * + * Test a simple IB in the DMA ring (NAVI10). + * Returns 0 on success, error on failure. + */ +static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ib ib; + struct dma_fence *f = NULL; + unsigned index; + long r; + u32 tmp = 0; + u64 gpu_addr; + + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } + + gpu_addr = adev->wb.gpu_addr + (index * 4); + tmp = 0xCAFEDEAD; + adev->wb.wb[index] = cpu_to_le32(tmp); + memset(&ib, 0, sizeof(ib)); + r = amdgpu_ib_get(adev, NULL, 256, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; + } + + ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); + ib.ptr[1] = lower_32_bits(gpu_addr); + ib.ptr[2] = upper_32_bits(gpu_addr); + ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0); + ib.ptr[4] = 0xDEADBEEF; + ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); + ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); + ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); + ib.length_dw = 8; + + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + if (r) + goto err1; + + r = dma_fence_wait_timeout(f, false, timeout); + if (r == 0) { + DRM_ERROR("amdgpu: IB test timed out\n"); + r = -ETIMEDOUT; + goto err1; + } else if (r < 0) { + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + goto err1; + } + tmp = le32_to_cpu(adev->wb.wb[index]); + if (tmp == 0xDEADBEEF) { + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + r = 0; + } else { + DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); + r = -EINVAL; + } + +err1: + amdgpu_ib_free(adev, &ib, NULL); + dma_fence_put(f); +err0: + amdgpu_device_wb_free(adev, index); + return r; +} + + +/** + * sdma_v5_0_vm_copy_pte - update PTEs by copying them from the GART + * + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @src: src addr to copy from + * @count: number of page entries to update + * + * Update PTEs by copying them from the GART using sDMA (NAVI10). + */ +static void sdma_v5_0_vm_copy_pte(struct amdgpu_ib *ib, + uint64_t pe, uint64_t src, + unsigned count) +{ + unsigned bytes = count * 8; + + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); + ib->ptr[ib->length_dw++] = bytes - 1; + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ + ib->ptr[ib->length_dw++] = lower_32_bits(src); + ib->ptr[ib->length_dw++] = upper_32_bits(src); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + +} + +/** + * sdma_v5_0_vm_write_pte - update PTEs by writing them manually + * + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: access flags + * + * Update PTEs by writing them manually using sDMA (NAVI10). + */ +static void sdma_v5_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, + uint64_t value, unsigned count, + uint32_t incr) +{ + unsigned ndw = count * 2; + + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + ib->ptr[ib->length_dw++] = ndw - 1; + for (; ndw > 0; ndw -= 2) { + ib->ptr[ib->length_dw++] = lower_32_bits(value); + ib->ptr[ib->length_dw++] = upper_32_bits(value); + value += incr; + } +} + +/** + * sdma_v5_0_vm_set_pte_pde - update the page tables using sDMA + * + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: access flags + * + * Update the page tables using sDMA (NAVI10). + */ +static void sdma_v5_0_vm_set_pte_pde(struct amdgpu_ib *ib, + uint64_t pe, + uint64_t addr, unsigned count, + uint32_t incr, uint64_t flags) +{ + /* for physically contiguous pages (vram) */ + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */ + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */ + ib->ptr[ib->length_dw++] = upper_32_bits(flags); + ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */ + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = incr; /* increment size */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = count - 1; /* number of entries */ +} + +/** + * sdma_v5_0_ring_pad_ib - pad the IB to the required number of dw + * + * @ib: indirect buffer to fill with padding + * + */ +static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) +{ + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); + u32 pad_count; + int i; + + pad_count = (8 - (ib->length_dw & 0x7)) % 8; + for (i = 0; i < pad_count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + ib->ptr[ib->length_dw++] = + SDMA_PKT_HEADER_OP(SDMA_OP_NOP) | + SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1); + else + ib->ptr[ib->length_dw++] = + SDMA_PKT_HEADER_OP(SDMA_OP_NOP); +} + + +/** + * sdma_v5_0_ring_emit_pipeline_sync - sync the pipeline + * + * @ring: amdgpu_ring pointer + * + * Make sure all previous operations are completed (CIK). + */ +static void sdma_v5_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) +{ + uint32_t seq = ring->fence_drv.sync_seq; + uint64_t addr = ring->fence_drv.gpu_addr; + + /* wait for idle */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ + SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1)); + amdgpu_ring_write(ring, addr & 0xfffffffc); + amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); + amdgpu_ring_write(ring, seq); /* reference */ + amdgpu_ring_write(ring, 0xffffffff); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ +} + + +/** + * sdma_v5_0_ring_emit_vm_flush - vm flush using sDMA + * + * @ring: amdgpu_ring pointer + * @vm: amdgpu_vm pointer + * + * Update the page table base and flush the VM TLB + * using sDMA (NAVI10). + */ +static void sdma_v5_0_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); +} + +static void sdma_v5_0_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, val); +} + +static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); /* reference */ + amdgpu_ring_write(ring, mask); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); +} + +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + amdgpu_ring_emit_wreg(ring, reg0, ref); + /* wait for a cycle to reset vm_inv_eng*_ack */ + amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0); + amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); +} + +static int sdma_v5_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->sdma.num_instances = 2; + + sdma_v5_0_set_ring_funcs(adev); + sdma_v5_0_set_buffer_funcs(adev); + sdma_v5_0_set_vm_pte_funcs(adev); + sdma_v5_0_set_irq_funcs(adev); + + return 0; +} + + +static int sdma_v5_0_sw_init(void *handle) +{ + struct amdgpu_ring *ring; + int r, i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* SDMA trap event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, + SDMA0_5_0__SRCID__SDMA_TRAP, + &adev->sdma.trap_irq); + if (r) + return r; + + /* SDMA trap event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, + SDMA1_5_0__SRCID__SDMA_TRAP, + &adev->sdma.trap_irq); + if (r) + return r; + + r = sdma_v5_0_init_microcode(adev); + if (r) { + DRM_ERROR("Failed to load sdma firmware!\n"); + return r; + } + + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; + ring->ring_obj = NULL; + ring->use_doorbell = true; + + DRM_INFO("use_doorbell being set to: [%s]\n", + ring->use_doorbell?"true":"false"); + + ring->doorbell_index = (i == 0) ? + (adev->doorbell_index.sdma_engine[0] << 1) //get DWORD offset + : (adev->doorbell_index.sdma_engine[1] << 1); // get DWORD offset + + sprintf(ring->name, "sdma%d", i); + r = amdgpu_ring_init(adev, ring, 1024, + &adev->sdma.trap_irq, + (i == 0) ? + AMDGPU_SDMA_IRQ_INSTANCE0 : + AMDGPU_SDMA_IRQ_INSTANCE1); + if (r) + return r; + } + + return r; +} + +static int sdma_v5_0_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) + amdgpu_ring_fini(&adev->sdma.instance[i].ring); + + return 0; +} + +static int sdma_v5_0_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + sdma_v5_0_init_golden_registers(adev); + + r = sdma_v5_0_start(adev); + + return r; +} + +static int sdma_v5_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + return 0; + + sdma_v5_0_ctx_switch_enable(adev, false); + sdma_v5_0_enable(adev, false); + + return 0; +} + +static int sdma_v5_0_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return sdma_v5_0_hw_fini(adev); +} + +static int sdma_v5_0_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return sdma_v5_0_hw_init(adev); +} + +static bool sdma_v5_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + u32 tmp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_STATUS_REG)); + + if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) + return false; + } + + return true; +} + +static int sdma_v5_0_wait_for_idle(void *handle) +{ + unsigned i; + u32 sdma0, sdma1; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < adev->usec_timeout; i++) { + sdma0 = RREG32(sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG)); + sdma1 = RREG32(sdma_v5_0_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG)); + + if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK) + return 0; + udelay(1); + } + return -ETIMEDOUT; +} + +static int sdma_v5_0_soft_reset(void *handle) +{ + /* todo */ + + return 0; +} + +static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring) +{ + int i, r = 0; + struct amdgpu_device *adev = ring->adev; + u32 index = 0; + u64 sdma_gfx_preempt; + + amdgpu_sdma_get_index_from_ring(ring, &index); + if (index == 0) + sdma_gfx_preempt = mmSDMA0_GFX_PREEMPT; + else + sdma_gfx_preempt = mmSDMA1_GFX_PREEMPT; + + /* assert preemption condition */ + amdgpu_ring_set_preempt_cond_exec(ring, false); + + /* emit the trailing fence */ + ring->trail_seq += 1; + amdgpu_ring_alloc(ring, 10); + sdma_v5_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr, + ring->trail_seq, 0); + amdgpu_ring_commit(ring); + + /* assert IB preemption */ + WREG32(sdma_gfx_preempt, 1); + + /* poll the trailing fence */ + for (i = 0; i < adev->usec_timeout; i++) { + if (ring->trail_seq == + le32_to_cpu(*(ring->trail_fence_cpu_addr))) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) { + r = -EINVAL; + DRM_ERROR("ring %d failed to be preempted\n", ring->idx); + } + + /* deassert IB preemption */ + WREG32(sdma_gfx_preempt, 0); + + /* deassert the preemption condition */ + amdgpu_ring_set_preempt_cond_exec(ring, true); + return r; +} + +static int sdma_v5_0_set_trap_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 sdma_cntl; + + u32 reg_offset = (type == AMDGPU_SDMA_IRQ_INSTANCE0) ? + sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_CNTL) : + sdma_v5_0_get_reg_offset(adev, 1, mmSDMA0_CNTL); + + sdma_cntl = RREG32(reg_offset); + sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32(reg_offset, sdma_cntl); + + return 0; +} + +static int sdma_v5_0_process_trap_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("IH: SDMA trap\n"); + switch (entry->client_id) { + case SOC15_IH_CLIENTID_SDMA0: + switch (entry->ring_id) { + case 0: + amdgpu_fence_process(&adev->sdma.instance[0].ring); + break; + case 1: + /* XXX compute */ + break; + case 2: + /* XXX compute */ + break; + case 3: + /* XXX page queue*/ + break; + } + break; + case SOC15_IH_CLIENTID_SDMA1: + switch (entry->ring_id) { + case 0: + amdgpu_fence_process(&adev->sdma.instance[1].ring); + break; + case 1: + /* XXX compute */ + break; + case 2: + /* XXX compute */ + break; + case 3: + /* XXX page queue*/ + break; + } + break; + } + return 0; +} + +static int sdma_v5_0_process_illegal_inst_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + return 0; +} + +static void sdma_v5_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { + /* Enable sdma clock gating */ + def = data = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL)); + data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); + if (def != data) + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data); + } else { + /* Disable sdma clock gating */ + def = data = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL)); + data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); + if (def != data) + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data); + } + } +} + +static void sdma_v5_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) { + /* Enable sdma mem light sleep */ + def = data = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL)); + data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + if (def != data) + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data); + + } else { + /* Disable sdma mem light sleep */ + def = data = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL)); + data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + if (def != data) + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data); + + } + } +} + +static int sdma_v5_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (adev->asic_type) { + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + sdma_v5_0_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + sdma_v5_0_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: + break; + } + + return 0; +} + +static int sdma_v5_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +static void sdma_v5_0_get_clockgating_state(void *handle, u32 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int data; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + /* AMD_CG_SUPPORT_SDMA_MGCG */ + data = RREG32(sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_CLK_CTRL)); + if (!(data & SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK)) + *flags |= AMD_CG_SUPPORT_SDMA_MGCG; + + /* AMD_CG_SUPPORT_SDMA_LS */ + data = RREG32(sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_POWER_CNTL)); + if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK) + *flags |= AMD_CG_SUPPORT_SDMA_LS; +} + +const struct amd_ip_funcs sdma_v5_0_ip_funcs = { + .name = "sdma_v5_0", + .early_init = sdma_v5_0_early_init, + .late_init = NULL, + .sw_init = sdma_v5_0_sw_init, + .sw_fini = sdma_v5_0_sw_fini, + .hw_init = sdma_v5_0_hw_init, + .hw_fini = sdma_v5_0_hw_fini, + .suspend = sdma_v5_0_suspend, + .resume = sdma_v5_0_resume, + .is_idle = sdma_v5_0_is_idle, + .wait_for_idle = sdma_v5_0_wait_for_idle, + .soft_reset = sdma_v5_0_soft_reset, + .set_clockgating_state = sdma_v5_0_set_clockgating_state, + .set_powergating_state = sdma_v5_0_set_powergating_state, + .get_clockgating_state = sdma_v5_0_get_clockgating_state, +}; + +static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { + .type = AMDGPU_RING_TYPE_SDMA, + .align_mask = 0xf, + .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), + .support_64bit_ptrs = true, + .vmhub = AMDGPU_GFXHUB_0, + .get_rptr = sdma_v5_0_ring_get_rptr, + .get_wptr = sdma_v5_0_ring_get_wptr, + .set_wptr = sdma_v5_0_ring_set_wptr, + .emit_frame_size = + 5 + /* sdma_v5_0_ring_init_cond_exec */ + 6 + /* sdma_v5_0_ring_emit_hdp_flush */ + 3 + /* hdp_invalidate */ + 6 + /* sdma_v5_0_ring_emit_pipeline_sync */ + /* sdma_v5_0_ring_emit_vm_flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 + + 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */ + .emit_ib = sdma_v5_0_ring_emit_ib, + .emit_fence = sdma_v5_0_ring_emit_fence, + .emit_pipeline_sync = sdma_v5_0_ring_emit_pipeline_sync, + .emit_vm_flush = sdma_v5_0_ring_emit_vm_flush, + .emit_hdp_flush = sdma_v5_0_ring_emit_hdp_flush, + .test_ring = sdma_v5_0_ring_test_ring, + .test_ib = sdma_v5_0_ring_test_ib, + .insert_nop = sdma_v5_0_ring_insert_nop, + .pad_ib = sdma_v5_0_ring_pad_ib, + .emit_wreg = sdma_v5_0_ring_emit_wreg, + .emit_reg_wait = sdma_v5_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait, + .init_cond_exec = sdma_v5_0_ring_init_cond_exec, + .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec, + .preempt_ib = sdma_v5_0_ring_preempt_ib, +}; + +static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + adev->sdma.instance[i].ring.funcs = &sdma_v5_0_ring_funcs; + adev->sdma.instance[i].ring.me = i; + } +} + +static const struct amdgpu_irq_src_funcs sdma_v5_0_trap_irq_funcs = { + .set = sdma_v5_0_set_trap_irq_state, + .process = sdma_v5_0_process_trap_irq, +}; + +static const struct amdgpu_irq_src_funcs sdma_v5_0_illegal_inst_irq_funcs = { + .process = sdma_v5_0_process_illegal_inst_irq, +}; + +static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 + + adev->sdma.num_instances; + adev->sdma.trap_irq.funcs = &sdma_v5_0_trap_irq_funcs; + adev->sdma.illegal_inst_irq.funcs = &sdma_v5_0_illegal_inst_irq_funcs; +} + +/** + * sdma_v5_0_emit_copy_buffer - copy buffer using the sDMA engine + * + * @ring: amdgpu_ring structure holding ring information + * @src_offset: src GPU address + * @dst_offset: dst GPU address + * @byte_count: number of bytes to xfer + * + * Copy GPU buffers using the DMA engine (NAVI10). + * Used by the amdgpu ttm implementation to move pages if + * registered as the asic copy callback. + */ +static void sdma_v5_0_emit_copy_buffer(struct amdgpu_ib *ib, + uint64_t src_offset, + uint64_t dst_offset, + uint32_t byte_count) +{ + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); + ib->ptr[ib->length_dw++] = byte_count - 1; + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ + ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); +} + +/** + * sdma_v5_0_emit_fill_buffer - fill buffer using the sDMA engine + * + * @ring: amdgpu_ring structure holding ring information + * @src_data: value to write to buffer + * @dst_offset: dst GPU address + * @byte_count: number of bytes to xfer + * + * Fill GPU buffers using the DMA engine (NAVI10). + */ +static void sdma_v5_0_emit_fill_buffer(struct amdgpu_ib *ib, + uint32_t src_data, + uint64_t dst_offset, + uint32_t byte_count) +{ + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = src_data; + ib->ptr[ib->length_dw++] = byte_count - 1; +} + +static const struct amdgpu_buffer_funcs sdma_v5_0_buffer_funcs = { + .copy_max_bytes = 0x400000, + .copy_num_dw = 7, + .emit_copy_buffer = sdma_v5_0_emit_copy_buffer, + + .fill_max_bytes = 0x400000, + .fill_num_dw = 5, + .emit_fill_buffer = sdma_v5_0_emit_fill_buffer, +}; + +static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev) +{ + if (adev->mman.buffer_funcs == NULL) { + adev->mman.buffer_funcs = &sdma_v5_0_buffer_funcs; + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; + } +} + +static const struct amdgpu_vm_pte_funcs sdma_v5_0_vm_pte_funcs = { + .copy_pte_num_dw = 7, + .copy_pte = sdma_v5_0_vm_copy_pte, + .write_pte = sdma_v5_0_vm_write_pte, + .set_pte_pde = sdma_v5_0_vm_set_pte_pde, +}; + +static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev) +{ + struct drm_gpu_scheduler *sched; + unsigned i; + + if (adev->vm_manager.vm_pte_funcs == NULL) { + adev->vm_manager.vm_pte_funcs = &sdma_v5_0_vm_pte_funcs; + for (i = 0; i < adev->sdma.num_instances; i++) { + sched = &adev->sdma.instance[i].ring.sched; + adev->vm_manager.vm_pte_rqs[i] = + &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; + } + adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; + } +} + +const struct amdgpu_ip_block_version sdma_v5_0_ip_block = { + .type = AMD_IP_BLOCK_TYPE_SDMA, + .major = 5, + .minor = 0, + .rev = 0, + .funcs = &sdma_v5_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h new file mode 100644 index 000000000000..d5a94e3d181c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h @@ -0,0 +1,45 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __SDMA_V5_0_H__ +#define __SDMA_V5_0_H__ + +enum sdma_v5_0_utcl2_cache_read_policy { + CACHE_READ_POLICY_L2__LRU = 0x00000000, + CACHE_READ_POLICY_L2__STREAM = 0x00000001, + CACHE_READ_POLICY_L2__NOA = 0x00000002, + CACHE_READ_POLICY_L2__DEFAULT = CACHE_READ_POLICY_L2__NOA, +}; + +enum sdma_v5_0_utcl2_cache_write_policy { + CACHE_WRITE_POLICY_L2__LRU = 0x00000000, + CACHE_WRITE_POLICY_L2__STREAM = 0x00000001, + CACHE_WRITE_POLICY_L2__NOA = 0x00000002, + CACHE_WRITE_POLICY_L2__BYPASS = 0x00000003, + CACHE_WRITE_POLICY_L2__DEFAULT = CACHE_WRITE_POLICY_L2__BYPASS, +}; + +extern const struct amd_ip_funcs sdma_v5_0_ip_funcs; +extern const struct amdgpu_ip_block_version sdma_v5_0_ip_block; + +#endif /* __SDMA_V5_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 9d8df68893b9..29024e64c886 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -24,7 +24,8 @@ #include <linux/firmware.h> #include <linux/slab.h> #include <linux/module.h> -#include <drm/drmP.h> +#include <linux/pci.h> + #include "amdgpu.h" #include "amdgpu_atombios.h" #include "amdgpu_ih.h" @@ -974,6 +975,17 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v) static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = { {GRBM_STATUS}, + {mmGRBM_STATUS2}, + {mmGRBM_STATUS_SE0}, + {mmGRBM_STATUS_SE1}, + {mmSRBM_STATUS}, + {mmSRBM_STATUS2}, + {DMA_STATUS_REG + DMA0_REGISTER_OFFSET}, + {DMA_STATUS_REG + DMA1_REGISTER_OFFSET}, + {mmCP_STAT}, + {mmCP_STALLED_STAT1}, + {mmCP_STALLED_STAT2}, + {mmCP_STALLED_STAT3}, {GB_ADDR_CONFIG}, {MC_ARB_RAMCFG}, {GB_TILE_MODE0}, @@ -1185,6 +1197,12 @@ static int si_asic_reset(struct amdgpu_device *adev) return 0; } +static enum amd_reset_method +si_asic_reset_method(struct amdgpu_device *adev) +{ + return AMD_RESET_METHOD_LEGACY; +} + static u32 si_get_config_memsize(struct amdgpu_device *adev) { return RREG32(mmCONFIG_MEMSIZE); @@ -1339,8 +1357,8 @@ static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, /* This reports 0 on APUs, so return to avoid writing/reading registers * that may or may not be different from their GPU counterparts */ - if (adev->flags & AMD_IS_APU) - return; + if (adev->flags & AMD_IS_APU) + return; /* Set the 2 events that we wish to watch, defined above */ /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */ @@ -1375,12 +1393,25 @@ static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); } +static uint64_t si_get_pcie_replay_count(struct amdgpu_device *adev) +{ + uint64_t nak_r, nak_g; + + /* Get the number of NAKs received and generated */ + nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK); + nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED); + + /* Add the total number of NAKs, i.e the number of replays */ + return (nak_r + nak_g); +} + static const struct amdgpu_asic_funcs si_asic_funcs = { .read_disabled_bios = &si_read_disabled_bios, .read_bios_from_rom = &si_read_bios_from_rom, .read_register = &si_read_register, .reset = &si_asic_reset, + .reset_method = &si_asic_reset_method, .set_vga_state = &si_vga_set_state, .get_xclk = &si_get_xclk, .set_uvd_clocks = &si_set_uvd_clocks, @@ -1393,6 +1424,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = .need_full_reset = &si_need_full_reset, .get_pcie_usage = &si_get_pcie_usage, .need_reset_on_init = &si_need_reset_on_init, + .get_pcie_replay_count = &si_get_pcie_replay_count, }; static uint32_t si_get_rev_id(struct amdgpu_device *adev) @@ -1867,7 +1899,7 @@ static void si_program_aspm(struct amdgpu_device *adev) if (orig != data) si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data); - if ((adev->family != CHIP_OLAND) && (adev->family != CHIP_HAINAN)) { + if ((adev->asic_type != CHIP_OLAND) && (adev->asic_type != CHIP_HAINAN)) { orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0); data &= ~PLL_RAMP_UP_TIME_0_MASK; if (orig != data) @@ -1916,14 +1948,14 @@ static void si_program_aspm(struct amdgpu_device *adev) orig = data = si_pif_phy0_rreg(adev,PB0_PIF_CNTL); data &= ~LS2_EXIT_TIME_MASK; - if ((adev->family == CHIP_OLAND) || (adev->family == CHIP_HAINAN)) + if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN)) data |= LS2_EXIT_TIME(5); if (orig != data) si_pif_phy0_wreg(adev,PB0_PIF_CNTL, data); orig = data = si_pif_phy1_rreg(adev,PB1_PIF_CNTL); data &= ~LS2_EXIT_TIME_MASK; - if ((adev->family == CHIP_OLAND) || (adev->family == CHIP_HAINAN)) + if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN)) data |= LS2_EXIT_TIME(5); if (orig != data) si_pif_phy1_wreg(adev,PB1_PIF_CNTL, data); diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index f15f196684ba..bdda8b4e03f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -21,7 +21,7 @@ * * Authors: Alex Deucher */ -#include <drm/drmP.h> + #include "amdgpu.h" #include "amdgpu_trace.h" #include "si.h" @@ -230,7 +230,7 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring) tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) @@ -503,8 +503,8 @@ static int si_dma_sw_init(void *handle) r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, (i == 0) ? - AMDGPU_SDMA_IRQ_TRAP0 : - AMDGPU_SDMA_IRQ_TRAP1); + AMDGPU_SDMA_IRQ_INSTANCE0 : + AMDGPU_SDMA_IRQ_INSTANCE1); if (r) return r; } @@ -591,7 +591,7 @@ static int si_dma_set_trap_irq_state(struct amdgpu_device *adev, u32 sdma_cntl; switch (type) { - case AMDGPU_SDMA_IRQ_TRAP0: + case AMDGPU_SDMA_IRQ_INSTANCE0: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET); @@ -607,7 +607,7 @@ static int si_dma_set_trap_irq_state(struct amdgpu_device *adev, break; } break; - case AMDGPU_SDMA_IRQ_TRAP1: + case AMDGPU_SDMA_IRQ_INSTANCE1: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET); diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 41e01a7f57a4..4cb4c891120b 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -21,7 +21,9 @@ * */ -#include <drm/drmP.h> +#include <linux/module.h> +#include <linux/pci.h> + #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_dpm.h" @@ -4098,14 +4100,13 @@ static int si_notify_smc_display_change(struct amdgpu_device *adev, static void si_program_response_times(struct amdgpu_device *adev) { - u32 voltage_response_time, backbias_response_time, acpi_delay_time, vbi_time_out; + u32 voltage_response_time, acpi_delay_time, vbi_time_out; u32 vddc_dly, acpi_dly, vbi_dly; u32 reference_clock; si_write_smc_soft_register(adev, SI_SMC_SOFT_REGISTER_mvdd_chg_time, 1); voltage_response_time = (u32)adev->pm.dpm.voltage_response_time; - backbias_response_time = (u32)adev->pm.dpm.backbias_response_time; if (voltage_response_time == 0) voltage_response_time = 1000; diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 8c50c9cab455..88ae27a5a03d 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -20,7 +20,9 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include <drm/drmP.h> + +#include <linux/pci.h> + #include "amdgpu.h" #include "amdgpu_ih.h" #include "sid.h" @@ -62,7 +64,8 @@ static int si_ih_irq_init(struct amdgpu_device *adev) u32 interrupt_cntl, ih_cntl, ih_rb_cntl; si_ih_disable_interrupts(adev); - WREG32(INTERRUPT_CNTL2, adev->irq.ih.gpu_addr >> 8); + /* set dummy read address to dummy page address */ + WREG32(INTERRUPT_CNTL2, adev->dummy_page_addr >> 8); interrupt_cntl = RREG32(INTERRUPT_CNTL); interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE; interrupt_cntl &= ~IH_REQ_NONSNOOP_EN; diff --git a/drivers/gpu/drm/amd/amdgpu/si_smc.c b/drivers/gpu/drm/amd/amdgpu/si_smc.c index 4a2fd8b61940..8f994ffa9cd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_smc.c +++ b/drivers/gpu/drm/amd/amdgpu/si_smc.c @@ -23,7 +23,7 @@ */ #include <linux/firmware.h> -#include <drm/drmP.h> + #include "amdgpu.h" #include "sid.h" #include "ppsmc.h" diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c new file mode 100644 index 000000000000..c44723c267c9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c @@ -0,0 +1,732 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "smuio/smuio_11_0_0_offset.h" +#include "smuio/smuio_11_0_0_sh_mask.h" + +#include "smu_v11_0_i2c.h" +#include "amdgpu.h" +#include "soc15_common.h" +#include <drm/drm_fixed.h> +#include <drm/drm_drv.h> +#include "amdgpu_amdkfd.h" +#include <linux/i2c.h> +#include <linux/pci.h> +#include "amdgpu_ras.h" + +/* error codes */ +#define I2C_OK 0 +#define I2C_NAK_7B_ADDR_NOACK 1 +#define I2C_NAK_TXDATA_NOACK 2 +#define I2C_TIMEOUT 4 +#define I2C_SW_TIMEOUT 8 +#define I2C_ABORT 0x10 + +/* I2C transaction flags */ +#define I2C_NO_STOP 1 +#define I2C_RESTART 2 + +#define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control.eeprom_accessor))->adev +#define to_eeprom_control(x) container_of(x, struct amdgpu_ras_eeprom_control, eeprom_accessor) + +static void smu_v11_0_i2c_set_clock_gating(struct i2c_adapter *control, bool en) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint32_t reg = RREG32_SOC15(SMUIO, 0, mmSMUIO_PWRMGT); + + reg = REG_SET_FIELD(reg, SMUIO_PWRMGT, i2c_clk_gate_en, en ? 1 : 0); + WREG32_SOC15(SMUIO, 0, mmSMUIO_PWRMGT, reg); +} + + +static void smu_v11_0_i2c_enable(struct i2c_adapter *control, bool enable) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, enable ? 1 : 0); +} + +static void smu_v11_0_i2c_clear_status(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + /* do */ + { + RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CLR_INTR); + + } /* while (reg_CKSVII2C_ic_clr_intr == 0) */ +} + +static void smu_v11_0_i2c_configure(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint32_t reg = 0; + + reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_SLAVE_DISABLE, 1); + reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_RESTART_EN, 1); + reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_10BITADDR_MASTER, 0); + reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_10BITADDR_SLAVE, 0); + /* Standard mode */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_MAX_SPEED_MODE, 2); + reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_MASTER_MODE, 1); + + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CON, reg); +} + +static void smu_v11_0_i2c_set_clock(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + + /* + * Standard mode speed, These values are taken from SMUIO MAS, + * but are different from what is given is + * Synopsys spec. The values here are based on assumption + * that refclock is 100MHz + * + * Configuration for standard mode; Speed = 100kbps + * Scale linearly, for now only support standard speed clock + * This will work only with 100M ref clock + * + * TBD:Change the calculation to take into account ref clock values also. + */ + + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_FS_SPKLEN, 2); + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_SS_SCL_HCNT, 120); + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_SS_SCL_LCNT, 130); + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_SDA_HOLD, 20); +} + +static void smu_v11_0_i2c_set_address(struct i2c_adapter *control, uint8_t address) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + + /* Convert fromr 8-bit to 7-bit address */ + address >>= 1; + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TAR, (address & 0xFF)); +} + +static uint32_t smu_v11_0_i2c_poll_tx_status(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint32_t ret = I2C_OK; + uint32_t reg, reg_c_tx_abrt_source; + + /*Check if transmission is completed */ + unsigned long timeout_counter = jiffies + msecs_to_jiffies(20); + + do { + if (time_after(jiffies, timeout_counter)) { + ret |= I2C_SW_TIMEOUT; + break; + } + + reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); + + } while (REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFE) == 0); + + if (ret != I2C_OK) + return ret; + + /* This only checks if NAK is received and transaction got aborted */ + reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_INTR_STAT); + + if (REG_GET_FIELD(reg, CKSVII2C_IC_INTR_STAT, R_TX_ABRT) == 1) { + reg_c_tx_abrt_source = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TX_ABRT_SOURCE); + DRM_INFO("TX was terminated, IC_TX_ABRT_SOURCE val is:%x", reg_c_tx_abrt_source); + + /* Check for stop due to NACK */ + if (REG_GET_FIELD(reg_c_tx_abrt_source, + CKSVII2C_IC_TX_ABRT_SOURCE, + ABRT_TXDATA_NOACK) == 1) { + + ret |= I2C_NAK_TXDATA_NOACK; + + } else if (REG_GET_FIELD(reg_c_tx_abrt_source, + CKSVII2C_IC_TX_ABRT_SOURCE, + ABRT_7B_ADDR_NOACK) == 1) { + + ret |= I2C_NAK_7B_ADDR_NOACK; + } else { + ret |= I2C_ABORT; + } + + smu_v11_0_i2c_clear_status(control); + } + + return ret; +} + +static uint32_t smu_v11_0_i2c_poll_rx_status(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint32_t ret = I2C_OK; + uint32_t reg_ic_status, reg_c_tx_abrt_source; + + reg_c_tx_abrt_source = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TX_ABRT_SOURCE); + + /* If slave is not present */ + if (REG_GET_FIELD(reg_c_tx_abrt_source, + CKSVII2C_IC_TX_ABRT_SOURCE, + ABRT_7B_ADDR_NOACK) == 1) { + ret |= I2C_NAK_7B_ADDR_NOACK; + + smu_v11_0_i2c_clear_status(control); + } else { /* wait till some data is there in RXFIFO */ + /* Poll for some byte in RXFIFO */ + unsigned long timeout_counter = jiffies + msecs_to_jiffies(20); + + do { + if (time_after(jiffies, timeout_counter)) { + ret |= I2C_SW_TIMEOUT; + break; + } + + reg_ic_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); + + } while (REG_GET_FIELD(reg_ic_status, CKSVII2C_IC_STATUS, RFNE) == 0); + } + + return ret; +} + + + + +/** + * smu_v11_0_i2c_transmit - Send a block of data over the I2C bus to a slave device. + * + * @address: The I2C address of the slave device. + * @data: The data to transmit over the bus. + * @numbytes: The amount of data to transmit. + * @i2c_flag: Flags for transmission + * + * Returns 0 on success or error. + */ +static uint32_t smu_v11_0_i2c_transmit(struct i2c_adapter *control, + uint8_t address, uint8_t *data, + uint32_t numbytes, uint32_t i2c_flag) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint32_t bytes_sent, reg, ret = 0; + unsigned long timeout_counter; + + bytes_sent = 0; + + DRM_DEBUG_DRIVER("I2C_Transmit(), address = %x, bytes = %d , data: ", + (uint16_t)address, numbytes); + + if (drm_debug & DRM_UT_DRIVER) { + print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE, + 16, 1, data, numbytes, false); + } + + /* Set the I2C slave address */ + smu_v11_0_i2c_set_address(control, address); + /* Enable I2C */ + smu_v11_0_i2c_enable(control, true); + + /* Clear status bits */ + smu_v11_0_i2c_clear_status(control); + + + timeout_counter = jiffies + msecs_to_jiffies(20); + + while (numbytes > 0) { + reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); + if (REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFNF)) { + do { + reg = 0; + /* + * Prepare transaction, no need to set RESTART. I2C engine will send + * START as soon as it sees data in TXFIFO + */ + if (bytes_sent == 0) + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, RESTART, + (i2c_flag & I2C_RESTART) ? 1 : 0); + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT, data[bytes_sent]); + + /* determine if we need to send STOP bit or not */ + if (numbytes == 1) + /* Final transaction, so send stop unless I2C_NO_STOP */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, STOP, + (i2c_flag & I2C_NO_STOP) ? 0 : 1); + /* Write */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, CMD, 0); + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD, reg); + + /* Record that the bytes were transmitted */ + bytes_sent++; + numbytes--; + + reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); + + } while (numbytes && REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFNF)); + } + + /* + * We waited too long for the transmission FIFO to become not-full. + * Exit the loop with error. + */ + if (time_after(jiffies, timeout_counter)) { + ret |= I2C_SW_TIMEOUT; + goto Err; + } + } + + ret = smu_v11_0_i2c_poll_tx_status(control); + +Err: + /* Any error, no point in proceeding */ + if (ret != I2C_OK) { + if (ret & I2C_SW_TIMEOUT) + DRM_ERROR("TIMEOUT ERROR !!!"); + + if (ret & I2C_NAK_7B_ADDR_NOACK) + DRM_ERROR("Received I2C_NAK_7B_ADDR_NOACK !!!"); + + + if (ret & I2C_NAK_TXDATA_NOACK) + DRM_ERROR("Received I2C_NAK_TXDATA_NOACK !!!"); + } + + return ret; +} + + +/** + * smu_v11_0_i2c_receive - Receive a block of data over the I2C bus from a slave device. + * + * @address: The I2C address of the slave device. + * @numbytes: The amount of data to transmit. + * @i2c_flag: Flags for transmission + * + * Returns 0 on success or error. + */ +static uint32_t smu_v11_0_i2c_receive(struct i2c_adapter *control, + uint8_t address, uint8_t *data, + uint32_t numbytes, uint8_t i2c_flag) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint32_t bytes_received, ret = I2C_OK; + + bytes_received = 0; + + /* Set the I2C slave address */ + smu_v11_0_i2c_set_address(control, address); + + /* Enable I2C */ + smu_v11_0_i2c_enable(control, true); + + while (numbytes > 0) { + uint32_t reg = 0; + + smu_v11_0_i2c_clear_status(control); + + + /* Prepare transaction */ + + /* Each time we disable I2C, so this is not a restart */ + if (bytes_received == 0) + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, RESTART, + (i2c_flag & I2C_RESTART) ? 1 : 0); + + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT, 0); + /* Read */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, CMD, 1); + + /* Transmitting last byte */ + if (numbytes == 1) + /* Final transaction, so send stop if requested */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, STOP, + (i2c_flag & I2C_NO_STOP) ? 0 : 1); + + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD, reg); + + ret = smu_v11_0_i2c_poll_rx_status(control); + + /* Any error, no point in proceeding */ + if (ret != I2C_OK) { + if (ret & I2C_SW_TIMEOUT) + DRM_ERROR("TIMEOUT ERROR !!!"); + + if (ret & I2C_NAK_7B_ADDR_NOACK) + DRM_ERROR("Received I2C_NAK_7B_ADDR_NOACK !!!"); + + if (ret & I2C_NAK_TXDATA_NOACK) + DRM_ERROR("Received I2C_NAK_TXDATA_NOACK !!!"); + + break; + } + + reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD); + data[bytes_received] = REG_GET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT); + + /* Record that the bytes were received */ + bytes_received++; + numbytes--; + } + + DRM_DEBUG_DRIVER("I2C_Receive(), address = %x, bytes = %d, data :", + (uint16_t)address, bytes_received); + + if (drm_debug & DRM_UT_DRIVER) { + print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE, + 16, 1, data, bytes_received, false); + } + + return ret; +} + +static void smu_v11_0_i2c_abort(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint32_t reg = 0; + + /* Enable I2C engine; */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_ENABLE, ENABLE, 1); + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, reg); + + /* Abort previous transaction */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_ENABLE, ABORT, 1); + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, reg); + + DRM_DEBUG_DRIVER("I2C_Abort() Done."); +} + + +static bool smu_v11_0_i2c_activity_done(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + + const uint32_t IDLE_TIMEOUT = 1024; + uint32_t timeout_count = 0; + uint32_t reg_ic_enable, reg_ic_enable_status, reg_ic_clr_activity; + + reg_ic_enable_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE_STATUS); + reg_ic_enable = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE); + + + if ((REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) && + (REG_GET_FIELD(reg_ic_enable_status, CKSVII2C_IC_ENABLE_STATUS, IC_EN) == 1)) { + /* + * Nobody is using I2C engine, but engine remains active because + * someone missed to send STOP + */ + smu_v11_0_i2c_abort(control); + } else if (REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) { + /* Nobody is using I2C engine */ + return true; + } + + /* Keep reading activity bit until it's cleared */ + do { + reg_ic_clr_activity = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CLR_ACTIVITY); + + if (REG_GET_FIELD(reg_ic_clr_activity, + CKSVII2C_IC_CLR_ACTIVITY, CLR_ACTIVITY) == 0) + return true; + + ++timeout_count; + + } while (timeout_count < IDLE_TIMEOUT); + + return false; +} + +static void smu_v11_0_i2c_init(struct i2c_adapter *control) +{ + /* Disable clock gating */ + smu_v11_0_i2c_set_clock_gating(control, false); + + if (!smu_v11_0_i2c_activity_done(control)) + DRM_WARN("I2C busy !"); + + /* Disable I2C */ + smu_v11_0_i2c_enable(control, false); + + /* Configure I2C to operate as master and in standard mode */ + smu_v11_0_i2c_configure(control); + + /* Initialize the clock to 50 kHz default */ + smu_v11_0_i2c_set_clock(control); + +} + +static void smu_v11_0_i2c_fini(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint32_t reg_ic_enable_status, reg_ic_enable; + + smu_v11_0_i2c_enable(control, false); + + /* Double check if disabled, else force abort */ + reg_ic_enable_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE_STATUS); + reg_ic_enable = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE); + + if ((REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) && + (REG_GET_FIELD(reg_ic_enable_status, + CKSVII2C_IC_ENABLE_STATUS, IC_EN) == 1)) { + /* + * Nobody is using I2C engine, but engine remains active because + * someone missed to send STOP + */ + smu_v11_0_i2c_abort(control); + } + + /* Restore clock gating */ + + /* + * TODO Reenabling clock gating seems to break subsequent SMU operation + * on the I2C bus. My guess is that SMU doesn't disable clock gating like + * we do here before working with the bus. So for now just don't restore + * it but later work with SMU to see if they have this issue and can + * update their code appropriately + */ + /* smu_v11_0_i2c_set_clock_gating(control, true); */ + +} + +static bool smu_v11_0_i2c_bus_lock(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + + /* Send PPSMC_MSG_RequestI2CBus */ + if (!adev->powerplay.pp_funcs->smu_i2c_bus_access) + goto Fail; + + + if (!adev->powerplay.pp_funcs->smu_i2c_bus_access(adev->powerplay.pp_handle, true)) + return true; + +Fail: + return false; +} + +static bool smu_v11_0_i2c_bus_unlock(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + + /* Send PPSMC_MSG_RequestI2CBus */ + if (!adev->powerplay.pp_funcs->smu_i2c_bus_access) + goto Fail; + + /* Send PPSMC_MSG_ReleaseI2CBus */ + if (!adev->powerplay.pp_funcs->smu_i2c_bus_access(adev->powerplay.pp_handle, + false)) + return true; + +Fail: + return false; +} + +/***************************** EEPROM I2C GLUE ****************************/ + +static uint32_t smu_v11_0_i2c_eeprom_read_data(struct i2c_adapter *control, + uint8_t address, + uint8_t *data, + uint32_t numbytes) +{ + uint32_t ret = 0; + + /* First 2 bytes are dummy write to set EEPROM address */ + ret = smu_v11_0_i2c_transmit(control, address, data, 2, I2C_NO_STOP); + if (ret != I2C_OK) + goto Fail; + + /* Now read data starting with that address */ + ret = smu_v11_0_i2c_receive(control, address, data + 2, numbytes - 2, + I2C_RESTART); + +Fail: + if (ret != I2C_OK) + DRM_ERROR("ReadData() - I2C error occurred :%x", ret); + + return ret; +} + +static uint32_t smu_v11_0_i2c_eeprom_write_data(struct i2c_adapter *control, + uint8_t address, + uint8_t *data, + uint32_t numbytes) +{ + uint32_t ret; + + ret = smu_v11_0_i2c_transmit(control, address, data, numbytes, 0); + + if (ret != I2C_OK) + DRM_ERROR("WriteI2CData() - I2C error occurred :%x", ret); + else + /* + * According to EEPROM spec there is a MAX of 10 ms required for + * EEPROM to flush internal RX buffer after STOP was issued at the + * end of write transaction. During this time the EEPROM will not be + * responsive to any more commands - so wait a bit more. + * + * TODO Improve to wait for first ACK for slave address after + * internal write cycle done. + */ + msleep(10); + + return ret; + +} + +static void lock_bus(struct i2c_adapter *i2c, unsigned int flags) +{ + struct amdgpu_ras_eeprom_control *control = to_eeprom_control(i2c); + + if (!smu_v11_0_i2c_bus_lock(i2c)) { + DRM_ERROR("Failed to lock the bus from SMU"); + return; + } + + control->bus_locked = true; +} + +static int trylock_bus(struct i2c_adapter *i2c, unsigned int flags) +{ + WARN_ONCE(1, "This operation not supposed to run in atomic context!"); + return false; +} + +static void unlock_bus(struct i2c_adapter *i2c, unsigned int flags) +{ + struct amdgpu_ras_eeprom_control *control = to_eeprom_control(i2c); + + if (!smu_v11_0_i2c_bus_unlock(i2c)) { + DRM_ERROR("Failed to unlock the bus from SMU"); + return; + } + + control->bus_locked = false; +} + +static const struct i2c_lock_operations smu_v11_0_i2c_i2c_lock_ops = { + .lock_bus = lock_bus, + .trylock_bus = trylock_bus, + .unlock_bus = unlock_bus, +}; + +static int smu_v11_0_i2c_eeprom_i2c_xfer(struct i2c_adapter *i2c_adap, + struct i2c_msg *msgs, int num) +{ + int i, ret; + struct amdgpu_ras_eeprom_control *control = to_eeprom_control(i2c_adap); + + if (!control->bus_locked) { + DRM_ERROR("I2C bus unlocked, stopping transaction!"); + return -EIO; + } + + smu_v11_0_i2c_init(i2c_adap); + + for (i = 0; i < num; i++) { + if (msgs[i].flags & I2C_M_RD) + ret = smu_v11_0_i2c_eeprom_read_data(i2c_adap, + (uint8_t)msgs[i].addr, + msgs[i].buf, msgs[i].len); + else + ret = smu_v11_0_i2c_eeprom_write_data(i2c_adap, + (uint8_t)msgs[i].addr, + msgs[i].buf, msgs[i].len); + + if (ret != I2C_OK) { + num = -EIO; + break; + } + } + + smu_v11_0_i2c_fini(i2c_adap); + return num; +} + +static u32 smu_v11_0_i2c_eeprom_i2c_func(struct i2c_adapter *adap) +{ + return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; +} + + +static const struct i2c_algorithm smu_v11_0_i2c_eeprom_i2c_algo = { + .master_xfer = smu_v11_0_i2c_eeprom_i2c_xfer, + .functionality = smu_v11_0_i2c_eeprom_i2c_func, +}; + +int smu_v11_0_i2c_eeprom_control_init(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + int res; + + control->owner = THIS_MODULE; + control->class = I2C_CLASS_SPD; + control->dev.parent = &adev->pdev->dev; + control->algo = &smu_v11_0_i2c_eeprom_i2c_algo; + snprintf(control->name, sizeof(control->name), "RAS EEPROM"); + control->lock_ops = &smu_v11_0_i2c_i2c_lock_ops; + + res = i2c_add_adapter(control); + if (res) + DRM_ERROR("Failed to register hw i2c, err: %d\n", res); + + return res; +} + +void smu_v11_0_i2c_eeprom_control_fini(struct i2c_adapter *control) +{ + i2c_del_adapter(control); +} + +/* + * Keep this for future unit test if bugs arise + */ +#if 0 +#define I2C_TARGET_ADDR 0xA0 + +bool smu_v11_0_i2c_test_bus(struct i2c_adapter *control) +{ + + uint32_t ret = I2C_OK; + uint8_t data[6] = {0xf, 0, 0xde, 0xad, 0xbe, 0xef}; + + + DRM_INFO("Begin"); + + if (!smu_v11_0_i2c_bus_lock(control)) { + DRM_ERROR("Failed to lock the bus!."); + return false; + } + + smu_v11_0_i2c_init(control); + + /* Write 0xde to address 0x0000 on the EEPROM */ + ret = smu_v11_0_i2c_eeprom_write_data(control, I2C_TARGET_ADDR, data, 6); + + ret = smu_v11_0_i2c_eeprom_read_data(control, I2C_TARGET_ADDR, data, 6); + + smu_v11_0_i2c_fini(control); + + smu_v11_0_i2c_bus_unlock(control); + + + DRM_INFO("End"); + return true; +} +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h new file mode 100644 index 000000000000..973f28d68e70 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h @@ -0,0 +1,34 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef SMU_V11_I2C_CONTROL_H +#define SMU_V11_I2C_CONTROL_H + +#include <linux/types.h> + +struct i2c_adapter; + +int smu_v11_0_i2c_eeprom_control_init(struct i2c_adapter *control); +void smu_v11_0_i2c_eeprom_control_fini(struct i2c_adapter *control); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index ed89a101f73f..8e1640bc07af 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -23,7 +23,8 @@ #include <linux/firmware.h> #include <linux/slab.h> #include <linux/module.h> -#include <drm/drmP.h> +#include <linux/pci.h> + #include "amdgpu.h" #include "amdgpu_atombios.h" #include "amdgpu_ih.h" @@ -44,6 +45,7 @@ #include "smuio/smuio_9_0_offset.h" #include "smuio/smuio_9_0_sh_mask.h" #include "nbio/nbio_7_0_default.h" +#include "nbio/nbio_7_0_offset.h" #include "nbio/nbio_7_0_sh_mask.h" #include "nbio/nbio_7_0_smn.h" #include "mp/mp_9_0_offset.h" @@ -56,13 +58,22 @@ #include "mmhub_v1_0.h" #include "df_v1_7.h" #include "df_v3_6.h" +#include "nbio_v6_1.h" +#include "nbio_v7_0.h" +#include "nbio_v7_4.h" #include "vega10_ih.h" #include "sdma_v4_0.h" #include "uvd_v7_0.h" #include "vce_v4_0.h" #include "vcn_v1_0.h" +#include "vcn_v2_0.h" +#include "vcn_v2_5.h" #include "dce_virtual.h" #include "mxgpu_ai.h" +#include "amdgpu_smu.h" +#include "amdgpu_ras.h" +#include "amdgpu_xgmi.h" +#include <uapi/linux/kfd_ioctl.h> #define mmMP0_MISC_CGTT_CTRL0 0x01b9 #define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0 @@ -83,8 +94,8 @@ static u32 soc15_pcie_rreg(struct amdgpu_device *adev, u32 reg) { unsigned long flags, address, data; u32 r; - address = adev->nbio_funcs->get_pcie_index_offset(adev); - data = adev->nbio_funcs->get_pcie_data_offset(adev); + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, reg); @@ -98,8 +109,8 @@ static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags, address, data; - address = adev->nbio_funcs->get_pcie_index_offset(adev); - data = adev->nbio_funcs->get_pcie_data_offset(adev); + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, reg); @@ -109,6 +120,49 @@ static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); } +static u64 soc15_pcie_rreg64(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags, address, data; + u64 r; + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + /* read low 32 bit */ + WREG32(address, reg); + (void)RREG32(address); + r = RREG32(data); + + /* read high 32 bit*/ + WREG32(address, reg + 4); + (void)RREG32(address); + r |= ((u64)RREG32(data) << 32); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + return r; +} + +static void soc15_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v) +{ + unsigned long flags, address, data; + + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + /* write low 32 bit */ + WREG32(address, reg); + (void)RREG32(address); + WREG32(data, (u32)(v & 0xffffffffULL)); + (void)RREG32(data); + + /* write high 32 bit */ + WREG32(address, reg + 4); + (void)RREG32(address); + WREG32(data, (u32)(v >> 32)); + (void)RREG32(data); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + static u32 soc15_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg) { unsigned long flags, address, data; @@ -211,7 +265,7 @@ static void soc15_se_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v) static u32 soc15_get_config_memsize(struct amdgpu_device *adev) { - return adev->nbio_funcs->get_memsize(adev); + return adev->nbio.funcs->get_memsize(adev); } static u32 soc15_get_xclk(struct amdgpu_device *adev) @@ -229,7 +283,7 @@ void soc15_grbm_select(struct amdgpu_device *adev, grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid); grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL), grbm_gfx_cntl); + WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_CNTL, grbm_gfx_cntl); } static void soc15_vga_set_state(struct amdgpu_device *adev, bool state) @@ -269,15 +323,6 @@ static bool soc15_read_bios_from_rom(struct amdgpu_device *adev, return true; } -struct soc15_allowed_register_entry { - uint32_t hwip; - uint32_t inst; - uint32_t seg; - uint32_t reg_offset; - bool grbm_indexed; -}; - - static struct soc15_allowed_register_entry soc15_allowed_read_registers[] = { { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS)}, { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS2)}, @@ -294,6 +339,7 @@ static struct soc15_allowed_register_entry soc15_allowed_read_registers[] = { { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_BUSY_STAT)}, { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STALLED_STAT1)}, { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STATUS)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_BUSY_STAT)}, { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)}, { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)}, { SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)}, @@ -382,9 +428,17 @@ void soc15_program_register_sequence(struct amdgpu_device *adev, } else { tmp = RREG32(reg); tmp &= ~(entry->and_mask); - tmp |= entry->or_mask; + tmp |= (entry->or_mask & entry->and_mask); } - WREG32(reg, tmp); + + if (reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3) || + reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE) || + reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1) || + reg == SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG)) + WREG32_RLC(reg, tmp); + else + WREG32(reg, tmp); + } } @@ -392,6 +446,7 @@ void soc15_program_register_sequence(struct amdgpu_device *adev, static int soc15_asic_mode1_reset(struct amdgpu_device *adev) { u32 i; + int ret = 0; amdgpu_atombios_scratch_regs_engine_hung(adev, true); @@ -402,13 +457,15 @@ static int soc15_asic_mode1_reset(struct amdgpu_device *adev) pci_save_state(adev->pdev); - psp_gpu_reset(adev); + ret = psp_gpu_reset(adev); + if (ret) + dev_err(adev->dev, "GPU mode1 reset failed\n"); pci_restore_state(adev->pdev); /* wait for asic to come out of reset */ for (i = 0; i < adev->usec_timeout; i++) { - u32 memsize = adev->nbio_funcs->get_memsize(adev); + u32 memsize = adev->nbio.funcs->get_memsize(adev); if (memsize != 0xffffffff) break; @@ -417,63 +474,129 @@ static int soc15_asic_mode1_reset(struct amdgpu_device *adev) amdgpu_atombios_scratch_regs_engine_hung(adev, false); - return 0; + return ret; } static int soc15_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap) { - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + if (is_support_sw_smu(adev)) { + struct smu_context *smu = &adev->smu; - if (!pp_funcs || !pp_funcs->get_asic_baco_capability) { - *cap = false; - return -ENOENT; - } + *cap = smu_baco_is_support(smu); + return 0; + } else { + void *pp_handle = adev->powerplay.pp_handle; + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + + if (!pp_funcs || !pp_funcs->get_asic_baco_capability) { + *cap = false; + return -ENOENT; + } - return pp_funcs->get_asic_baco_capability(pp_handle, cap); + return pp_funcs->get_asic_baco_capability(pp_handle, cap); + } } static int soc15_asic_baco_reset(struct amdgpu_device *adev) { - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) - return -ENOENT; + /* avoid NBIF got stuck when do RAS recovery in BACO reset */ + if (ras && ras->supported) + adev->nbio.funcs->enable_doorbell_interrupt(adev, false); - /* enter BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 1)) - return -EIO; + dev_info(adev->dev, "GPU BACO reset\n"); - /* exit BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 0)) - return -EIO; + if (is_support_sw_smu(adev)) { + struct smu_context *smu = &adev->smu; - dev_info(adev->dev, "GPU BACO reset\n"); + if (smu_baco_reset(smu)) + return -EIO; + } else { + void *pp_handle = adev->powerplay.pp_handle; + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + + if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) + return -ENOENT; + + /* enter BACO state */ + if (pp_funcs->set_asic_baco_state(pp_handle, 1)) + return -EIO; + + /* exit BACO state */ + if (pp_funcs->set_asic_baco_state(pp_handle, 0)) + return -EIO; + } + + /* re-enable doorbell interrupt after BACO exit */ + if (ras && ras->supported) + adev->nbio.funcs->enable_doorbell_interrupt(adev, true); return 0; } -static int soc15_asic_reset(struct amdgpu_device *adev) +static int soc15_mode2_reset(struct amdgpu_device *adev) +{ + if (is_support_sw_smu(adev)) + return smu_mode2_reset(&adev->smu); + if (!adev->powerplay.pp_funcs || + !adev->powerplay.pp_funcs->asic_reset_mode_2) + return -ENOENT; + + return adev->powerplay.pp_funcs->asic_reset_mode_2(adev->powerplay.pp_handle); +} + +static enum amd_reset_method +soc15_asic_reset_method(struct amdgpu_device *adev) { - int ret; bool baco_reset; switch (adev->asic_type) { + case CHIP_RAVEN: + case CHIP_RENOIR: + return AMD_RESET_METHOD_MODE2; case CHIP_VEGA10: + case CHIP_VEGA12: soc15_asic_get_baco_capability(adev, &baco_reset); break; + case CHIP_VEGA20: + if (adev->psp.sos_fw_version >= 0x80067) + soc15_asic_get_baco_capability(adev, &baco_reset); + else + baco_reset = false; + if (baco_reset) { + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + if (hive || (ras && ras->supported)) + baco_reset = false; + } + break; default: baco_reset = false; break; } if (baco_reset) - ret = soc15_asic_baco_reset(adev); + return AMD_RESET_METHOD_BACO; else - ret = soc15_asic_mode1_reset(adev); + return AMD_RESET_METHOD_MODE1; +} - return ret; +static int soc15_asic_reset(struct amdgpu_device *adev) +{ + switch (soc15_asic_reset_method(adev)) { + case AMD_RESET_METHOD_BACO: + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); + return soc15_asic_baco_reset(adev); + case AMD_RESET_METHOD_MODE2: + return soc15_mode2_reset(adev); + default: + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); + return soc15_asic_mode1_reset(adev); + } } /*static int soc15_set_uvd_clock(struct amdgpu_device *adev, u32 clock, @@ -532,8 +655,8 @@ static void soc15_program_aspm(struct amdgpu_device *adev) static void soc15_enable_doorbell_aperture(struct amdgpu_device *adev, bool enable) { - adev->nbio_funcs->enable_doorbell_aperture(adev, enable); - adev->nbio_funcs->enable_doorbell_selfring_aperture(adev, enable); + adev->nbio.funcs->enable_doorbell_aperture(adev, enable); + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable); } static const struct amdgpu_ip_block_version vega10_common_ip_block = @@ -547,7 +670,7 @@ static const struct amdgpu_ip_block_version vega10_common_ip_block = static uint32_t soc15_get_rev_id(struct amdgpu_device *adev) { - return adev->nbio_funcs->get_rev_id(adev); + return adev->nbio.funcs->get_rev_id(adev); } int soc15_set_ip_blocks(struct amdgpu_device *adev) @@ -557,32 +680,41 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_RAVEN: + case CHIP_RENOIR: vega10_reg_base_init(adev); break; case CHIP_VEGA20: vega20_reg_base_init(adev); break; + case CHIP_ARCTURUS: + arct_reg_base_init(adev); + break; default: return -EINVAL; } - if (adev->asic_type == CHIP_VEGA20) + if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS) adev->gmc.xgmi.supported = true; - if (adev->flags & AMD_IS_APU) - adev->nbio_funcs = &nbio_v7_0_funcs; - else if (adev->asic_type == CHIP_VEGA20) - adev->nbio_funcs = &nbio_v7_4_funcs; - else - adev->nbio_funcs = &nbio_v6_1_funcs; + if (adev->flags & AMD_IS_APU) { + adev->nbio.funcs = &nbio_v7_0_funcs; + adev->nbio.hdp_flush_reg = &nbio_v7_0_hdp_flush_reg; + } else if (adev->asic_type == CHIP_VEGA20 || + adev->asic_type == CHIP_ARCTURUS) { + adev->nbio.funcs = &nbio_v7_4_funcs; + adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg; + } else { + adev->nbio.funcs = &nbio_v6_1_funcs; + adev->nbio.hdp_flush_reg = &nbio_v6_1_hdp_flush_reg; + } - if (adev->asic_type == CHIP_VEGA20) + if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS) adev->df_funcs = &df_v3_6_funcs; else adev->df_funcs = &df_v1_7_funcs; adev->rev_id = soc15_get_rev_id(adev); - adev->nbio_funcs->detect_hw_virt(adev); + adev->nbio.funcs->detect_hw_virt(adev); if (amdgpu_sriov_vf(adev)) adev->virt.ops = &xgpu_ai_virt_ops; @@ -593,24 +725,38 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) case CHIP_VEGA20: amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); - amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { - if (adev->asic_type == CHIP_VEGA20) - amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); - else - amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); + + /* For Vega10 SR-IOV, PSP need to be initialized before IH */ + if (amdgpu_sriov_vf(adev)) { + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { + if (adev->asic_type == CHIP_VEGA20) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + else + amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); + } + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + } else { + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { + if (adev->asic_type == CHIP_VEGA20) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + else + amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); + } } amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); - if (!amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); + if (!amdgpu_sriov_vf(adev)) { + if (is_support_sw_smu(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + else + amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); + } if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); -#else -# warning "Enable CONFIG_DRM_AMD_DC for display support on SOC15." #endif if (!(adev->asic_type == CHIP_VEGA20 && amdgpu_sriov_vf(adev))) { amdgpu_device_ip_block_add(adev, &uvd_v7_0_ip_block); @@ -631,11 +777,51 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); -#else -# warning "Enable CONFIG_DRM_AMD_DC for display support on SOC15." #endif amdgpu_device_ip_block_add(adev, &vcn_v1_0_ip_block); break; + case CHIP_ARCTURUS: + amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); + + if (amdgpu_sriov_vf(adev)) { + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + } else { + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + } + + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); + if (!amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + + if (unlikely(adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)) + amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); + break; + case CHIP_RENOIR: + amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &psp_v12_0_ip_block); + if (is_support_sw_smu(adev)) + amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); +#if defined(CONFIG_DRM_AMD_DC) + else if (amdgpu_device_has_dc_support(adev)) + amdgpu_device_ip_block_add(adev, &dm_ip_block); +#endif + amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); + break; default: return -EINVAL; } @@ -645,14 +831,14 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) static void soc15_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { - adev->nbio_funcs->hdp_flush(adev, ring); + adev->nbio.funcs->hdp_flush(adev, ring); } static void soc15_invalidate_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_READ_CACHE_INVALIDATE, 1); + WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1); else amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); @@ -673,11 +859,12 @@ static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, /* This reports 0 on APUs, so return to avoid writing/reading registers * that may or may not be different from their GPU counterparts */ - if (adev->flags & AMD_IS_APU) - return; + if (adev->flags & AMD_IS_APU) + return; /* Set the 2 events that we wish to watch, defined above */ - /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */ + /* Reg 40 is # received msgs */ + /* Reg 104 is # of posted requests sent */ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40); perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104); @@ -709,10 +896,65 @@ static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); } +static void vega20_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, + uint64_t *count1) +{ + uint32_t perfctr = 0; + uint64_t cnt0_of, cnt1_of; + int tmp; + + /* This reports 0 on APUs, so return to avoid writing/reading registers + * that may or may not be different from their GPU counterparts + */ + if (adev->flags & AMD_IS_APU) + return; + + /* Set the 2 events that we wish to watch, defined above */ + /* Reg 40 is # received msgs */ + /* Reg 108 is # of posted requests sent on VG20 */ + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK3, + EVENT0_SEL, 40); + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK3, + EVENT1_SEL, 108); + + /* Write to enable desired perf counters */ + WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3, perfctr); + /* Zero out and enable the perf counters + * Write 0x5: + * Bit 0 = Start all counters(1) + * Bit 2 = Global counter reset enable(1) + */ + WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000005); + + msleep(1000); + + /* Load the shadow and disable the perf counters + * Write 0x2: + * Bit 0 = Stop counters(0) + * Bit 1 = Load the shadow counters(1) + */ + WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000002); + + /* Read register values to get any >32bit overflow */ + tmp = RREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3); + cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK3, COUNTER0_UPPER); + cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK3, COUNTER1_UPPER); + + /* Get the values and add the overflow */ + *count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK3) | (cnt0_of << 32); + *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK3) | (cnt1_of << 32); +} + static bool soc15_need_reset_on_init(struct amdgpu_device *adev) { u32 sol_reg; + /* Just return false for soc15 GPUs. Reset does not seem to + * be necessary. + */ + if (!amdgpu_passthrough(adev)) + return false; + if (adev->flags & AMD_IS_APU) return false; @@ -726,12 +968,25 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev) return false; } +static uint64_t soc15_get_pcie_replay_count(struct amdgpu_device *adev) +{ + uint64_t nak_r, nak_g; + + /* Get the number of NAKs received and generated */ + nak_r = RREG32_PCIE(smnPCIE_RX_NUM_NAK); + nak_g = RREG32_PCIE(smnPCIE_RX_NUM_NAK_GENERATED); + + /* Add the total number of NAKs, i.e the number of replays */ + return (nak_r + nak_g); +} + static const struct amdgpu_asic_funcs soc15_asic_funcs = { .read_disabled_bios = &soc15_read_disabled_bios, .read_bios_from_rom = &soc15_read_bios_from_rom, .read_register = &soc15_read_register, .reset = &soc15_asic_reset, + .reset_method = &soc15_asic_reset_method, .set_vga_state = &soc15_vga_set_state, .get_xclk = &soc15_get_xclk, .set_uvd_clocks = &soc15_set_uvd_clocks, @@ -743,6 +998,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = .init_doorbell_index = &vega10_doorbell_index_init, .get_pcie_usage = &soc15_get_pcie_usage, .need_reset_on_init = &soc15_need_reset_on_init, + .get_pcie_replay_count = &soc15_get_pcie_replay_count, }; static const struct amdgpu_asic_funcs vega20_asic_funcs = @@ -760,18 +1016,25 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs = .invalidate_hdp = &soc15_invalidate_hdp, .need_full_reset = &soc15_need_full_reset, .init_doorbell_index = &vega20_doorbell_index_init, - .get_pcie_usage = &soc15_get_pcie_usage, + .get_pcie_usage = &vega20_get_pcie_usage, .need_reset_on_init = &soc15_need_reset_on_init, + .get_pcie_replay_count = &soc15_get_pcie_replay_count, + .reset_method = &soc15_asic_reset_method }; static int soc15_common_early_init(void *handle) { +#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; + adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; adev->smc_rreg = NULL; adev->smc_wreg = NULL; adev->pcie_rreg = &soc15_pcie_rreg; adev->pcie_wreg = &soc15_pcie_wreg; + adev->pcie_rreg64 = &soc15_pcie_rreg64; + adev->pcie_wreg64 = &soc15_pcie_wreg64; adev->uvd_ctx_rreg = &soc15_uvd_ctx_rreg; adev->uvd_ctx_wreg = &soc15_uvd_ctx_wreg; adev->didt_rreg = &soc15_didt_rreg; @@ -882,9 +1145,12 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_VCN_MGCG; - adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; + adev->pg_flags = AMD_PG_SUPPORT_SDMA | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG; } else if (adev->pdev->device == 0x15d8) { - adev->cg_flags = AMD_CG_SUPPORT_GFX_MGLS | + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS | @@ -924,13 +1190,53 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_VCN_MGCG; - adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; + adev->pg_flags = AMD_PG_SUPPORT_SDMA | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG; } - - if (adev->powerplay.pp_feature & PP_GFXOFF_MASK) - adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | - AMD_PG_SUPPORT_CP | - AMD_PG_SUPPORT_RLC_SMU_HS; + break; + case CHIP_ARCTURUS: + adev->asic_funcs = &vega20_asic_funcs; + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_IH_CG; + adev->pg_flags = 0; + adev->external_rev_id = adev->rev_id + 0x32; + break; + case CHIP_RENOIR: + adev->asic_funcs = &soc15_asic_funcs; + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_ROM_MGCG | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_DF_MGCG; + adev->pg_flags = AMD_PG_SUPPORT_SDMA | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG; + adev->external_rev_id = adev->rev_id + 0x91; break; default: /* FIXME: not supported yet */ @@ -948,11 +1254,15 @@ static int soc15_common_early_init(void *handle) static int soc15_common_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r = 0; if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_get_irq(adev); - return 0; + if (adev->nbio.funcs->ras_late_init) + r = adev->nbio.funcs->ras_late_init(adev); + + return r; } static int soc15_common_sw_init(void *handle) @@ -962,11 +1272,17 @@ static int soc15_common_sw_init(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_add_irq_id(adev); + adev->df_funcs->sw_init(adev); + return 0; } static int soc15_common_sw_fini(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_nbio_ras_fini(adev); + adev->df_funcs->sw_fini(adev); return 0; } @@ -975,15 +1291,18 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev) int i; struct amdgpu_ring *ring; - for (i = 0; i < adev->sdma.num_instances; i++) { - ring = &adev->sdma.instance[i].ring; - adev->nbio_funcs->sdma_doorbell_range(adev, i, - ring->use_doorbell, ring->doorbell_index, - adev->doorbell_index.sdma_doorbell_range); - } + /* sdma/ih doorbell range are programed by hypervisor */ + if (!amdgpu_sriov_vf(adev)) { + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; + adev->nbio.funcs->sdma_doorbell_range(adev, i, + ring->use_doorbell, ring->doorbell_index, + adev->doorbell_index.sdma_doorbell_range); + } - adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, + adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index); + } } static int soc15_common_hw_init(void *handle) @@ -995,7 +1314,14 @@ static int soc15_common_hw_init(void *handle) /* enable aspm */ soc15_program_aspm(adev); /* setup nbio registers */ - adev->nbio_funcs->init_registers(adev); + adev->nbio.funcs->init_registers(adev); + /* remap HDP registers to a hole in mmio space, + * for the purpose of expose those registers + * to process space + */ + if (adev->nbio.funcs->remap_hdp_registers) + adev->nbio.funcs->remap_hdp_registers(adev); + /* enable the doorbell aperture */ soc15_enable_doorbell_aperture(adev, true); /* HW doorbell routing policy: doorbell writing not @@ -1017,6 +1343,14 @@ static int soc15_common_hw_fini(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_put_irq(adev); + if (adev->nbio.ras_if && + amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) { + if (adev->nbio.funcs->init_ras_controller_interrupt) + amdgpu_irq_put(adev, &adev->nbio.ras_controller_irq, 0); + if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) + amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0); + } + return 0; } @@ -1053,7 +1387,8 @@ static void soc15_update_hdp_light_sleep(struct amdgpu_device *adev, bool enable { uint32_t def, data; - if (adev->asic_type == CHIP_VEGA20) { + if (adev->asic_type == CHIP_VEGA20 || + adev->asic_type == CHIP_ARCTURUS) { def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_CTRL)); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) @@ -1156,9 +1491,9 @@ static int soc15_common_set_clockgating_state(void *handle, case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: - adev->nbio_funcs->update_medium_grain_clock_gating(adev, + adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); - adev->nbio_funcs->update_medium_grain_light_sleep(adev, + adev->nbio.funcs->update_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); soc15_update_hdp_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); @@ -1172,9 +1507,10 @@ static int soc15_common_set_clockgating_state(void *handle, state == AMD_CG_STATE_GATE ? true : false); break; case CHIP_RAVEN: - adev->nbio_funcs->update_medium_grain_clock_gating(adev, + case CHIP_RENOIR: + adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); - adev->nbio_funcs->update_medium_grain_light_sleep(adev, + adev->nbio.funcs->update_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); soc15_update_hdp_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); @@ -1185,6 +1521,10 @@ static int soc15_common_set_clockgating_state(void *handle, soc15_update_rom_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); break; + case CHIP_ARCTURUS: + soc15_update_hdp_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; default: break; } @@ -1199,7 +1539,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags) if (amdgpu_sriov_vf(adev)) *flags = 0; - adev->nbio_funcs->get_clockgating_state(adev, flags); + adev->nbio.funcs->get_clockgating_state(adev, flags); /* AMD_CG_SUPPORT_HDP_LS */ data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS)); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index a66c8bfbbaa6..57af489a5de3 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -28,8 +28,8 @@ #include "nbio_v7_0.h" #include "nbio_v7_4.h" -#define SOC15_FLUSH_GPU_TLB_NUM_WREG 4 -#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 1 +#define SOC15_FLUSH_GPU_TLB_NUM_WREG 6 +#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 3 extern const struct amd_ip_funcs soc15_common_ip_funcs; @@ -42,11 +42,33 @@ struct soc15_reg_golden { u32 or_mask; }; +struct soc15_reg_entry { + uint32_t hwip; + uint32_t inst; + uint32_t seg; + uint32_t reg_offset; + uint32_t reg_value; + uint32_t se_num; + uint32_t instance; +}; + +struct soc15_allowed_register_entry { + uint32_t hwip; + uint32_t inst; + uint32_t seg; + uint32_t reg_offset; + bool grbm_indexed; +}; + #define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg +#define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset) + #define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \ { ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask } +#define SOC15_REG_FIELD(reg, field) reg##__##field##_MASK, reg##__##field##__SHIFT + void soc15_grbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); int soc15_set_ip_blocks(struct amdgpu_device *adev); @@ -57,6 +79,7 @@ void soc15_program_register_sequence(struct amdgpu_device *adev, int vega10_reg_base_init(struct amdgpu_device *adev); int vega20_reg_base_init(struct amdgpu_device *adev); +int arct_reg_base_init(struct amdgpu_device *adev); void vega10_doorbell_index_init(struct amdgpu_device *adev); void vega20_doorbell_index_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index 49c262540940..839f186e1182 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -69,26 +69,61 @@ } \ } while (0) -#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \ - ({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ - UVD_DPG_LMA_CTL__MASK_EN_MASK | \ - ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ - << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ - (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ - RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); }) +#define AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(a) (amdgpu_sriov_vf((a)) && !amdgpu_sriov_runtime((a))) +#define WREG32_RLC(reg, value) \ + do { \ + if (AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(adev)) { \ + uint32_t i = 0; \ + uint32_t retries = 50000; \ + uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0; \ + uint32_t r1 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1; \ + uint32_t spare_int = adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT; \ + WREG32(r0, value); \ + WREG32(r1, (reg | 0x80000000)); \ + WREG32(spare_int, 0x1); \ + for (i = 0; i < retries; i++) { \ + u32 tmp = RREG32(r1); \ + if (!(tmp & 0x80000000)) \ + break; \ + udelay(10); \ + } \ + if (i >= retries) \ + pr_err("timeout: rlcg program reg:0x%05x failed !\n", reg); \ + } else { \ + WREG32(reg, value); \ + } \ + } while (0) -#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \ +#define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \ do { \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ - UVD_DPG_LMA_CTL__READ_WRITE_MASK | \ - ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ - << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ - (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ + uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\ + if (AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(adev)) { \ + uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2; \ + uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3; \ + uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; \ + uint32_t grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; \ + if (target_reg == grbm_cntl) \ + WREG32(r2, value); \ + else if (target_reg == grbm_idx) \ + WREG32(r3, value); \ + WREG32(target_reg, value); \ + } else { \ + WREG32(target_reg, value); \ + } \ } while (0) -#endif +#define WREG32_SOC15_RLC(ip, inst, reg, value) \ + do { \ + uint32_t target_reg = adev->reg_offset[GC_HWIP][0][reg##_BASE_IDX] + reg;\ + WREG32_RLC(target_reg, value); \ + } while (0) + +#define WREG32_FIELD15_RLC(ip, idx, reg, field, val) \ + WREG32_RLC((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg), \ + (RREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg) \ + & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) +#define WREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset, value) \ + WREG32_RLC(((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset), value) +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h new file mode 100644 index 000000000000..ca7d05993ca2 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -0,0 +1,118 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _TA_RAS_IF_H +#define _TA_RAS_IF_H + +/* Responses have bit 31 set */ +#define RSP_ID_MASK (1U << 31) +#define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK) + +/* RAS related enumerations */ +/**********************************************************/ +enum ras_command { + TA_RAS_COMMAND__ENABLE_FEATURES = 0, + TA_RAS_COMMAND__DISABLE_FEATURES, + TA_RAS_COMMAND__TRIGGER_ERROR, +}; + +enum ta_ras_status { + TA_RAS_STATUS__SUCCESS = 0x00, + TA_RAS_STATUS__RESET_NEEDED = 0x01, + TA_RAS_STATUS__ERROR_INVALID_PARAMETER = 0x02, + TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE = 0x03, + TA_RAS_STATUS__ERROR_RAS_DUPLICATE_CMD = 0x04, + TA_RAS_STATUS__ERROR_INJECTION_FAILED = 0x05, + TA_RAS_STATUS__ERROR_ASD_READ_WRITE = 0x06, + TA_RAS_STATUS__ERROR_TOGGLE_DF_CSTATE = 0x07, + TA_RAS_STATUS__ERROR_TIMEOUT = 0x08, + TA_RAS_STATUS__ERROR_BLOCK_DISABLED = 0x09, + TA_RAS_STATUS__ERROR_GENERIC = 0x10, +}; + +enum ta_ras_block { + TA_RAS_BLOCK__UMC = 0, + TA_RAS_BLOCK__SDMA, + TA_RAS_BLOCK__GFX, + TA_RAS_BLOCK__MMHUB, + TA_RAS_BLOCK__ATHUB, + TA_RAS_BLOCK__PCIE_BIF, + TA_RAS_BLOCK__HDP, + TA_RAS_BLOCK__XGMI_WAFL, + TA_RAS_BLOCK__DF, + TA_RAS_BLOCK__SMN, + TA_RAS_BLOCK__SEM, + TA_RAS_BLOCK__MP0, + TA_RAS_BLOCK__MP1, + TA_RAS_BLOCK__FUSE, + TA_NUM_BLOCK_MAX +}; + +enum ta_ras_error_type { + TA_RAS_ERROR__NONE = 0, + TA_RAS_ERROR__PARITY = 1, + TA_RAS_ERROR__SINGLE_CORRECTABLE = 2, + TA_RAS_ERROR__MULTI_UNCORRECTABLE = 4, + TA_RAS_ERROR__POISON = 8, +}; + +/* Input/output structures for RAS commands */ +/**********************************************************/ + +struct ta_ras_enable_features_input { + enum ta_ras_block block_id; + enum ta_ras_error_type error_type; +}; + +struct ta_ras_disable_features_input { + enum ta_ras_block block_id; + enum ta_ras_error_type error_type; +}; + +struct ta_ras_trigger_error_input { + enum ta_ras_block block_id; // ras-block. i.e. umc, gfx + enum ta_ras_error_type inject_error_type; // type of error. i.e. single_correctable + uint32_t sub_block_index; // mem block. i.e. hbm, sram etc. + uint64_t address; // explicit address of error + uint64_t value; // method if error injection. i.e persistent, coherent etc. +}; + +/* Common input structure for RAS callbacks */ +/**********************************************************/ +union ta_ras_cmd_input { + struct ta_ras_enable_features_input enable_features; + struct ta_ras_disable_features_input disable_features; + struct ta_ras_trigger_error_input trigger_error; +}; + +/* Shared Memory structures */ +/**********************************************************/ +struct ta_ras_shared_memory { + uint32_t cmd_id; + uint32_t resp_id; + enum ta_ras_status ras_status; + uint32_t reserved; + union ta_ras_cmd_input ras_in_message; +}; + +#endif // TL_RAS_IF_H_ diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index a20b711a6756..e40140bf6699 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -20,7 +20,9 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include <drm/drmP.h> + +#include <linux/pci.h> + #include "amdgpu.h" #include "amdgpu_ih.h" #include "vid.h" diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c new file mode 100644 index 000000000000..0d6b50528d76 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c @@ -0,0 +1,37 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "umc_v6_0.h" +#include "amdgpu.h" + +static void umc_v6_0_init_registers(struct amdgpu_device *adev) +{ + unsigned i,j; + + for (i = 0; i < 4; i++) + for (j = 0; j < 4; j++) + WREG32((i*0x100000 + 0x5010c + j*0x2000)/4, 0x1002); +} + +const struct amdgpu_umc_funcs umc_v6_0_funcs = { + .init_registers = umc_v6_0_init_registers, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.h new file mode 100644 index 000000000000..109f1a57a46e --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.h @@ -0,0 +1,31 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __UMC_V6_0_H__ +#define __UMC_V6_0_H__ + +#include "soc15_common.h" +#include "amdgpu.h" + +extern const struct amdgpu_umc_funcs umc_v6_0_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c new file mode 100644 index 000000000000..47c4b96b14d1 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -0,0 +1,281 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "umc_v6_1.h" +#include "amdgpu_ras.h" +#include "amdgpu.h" + +#include "rsmu/rsmu_0_0_2_offset.h" +#include "rsmu/rsmu_0_0_2_sh_mask.h" +#include "umc/umc_6_1_1_offset.h" +#include "umc/umc_6_1_1_sh_mask.h" + +#define smnMCA_UMC0_MCUMC_ADDRT0 0x50f10 + +/* + * (addr / 256) * 8192, the higher 26 bits in ErrorAddr + * is the index of 8KB block + */ +#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5) +/* channel index is the index of 256B block */ +#define ADDR_OF_256B_BLOCK(channel_index) ((channel_index) << 8) +/* offset in 256B block */ +#define OFFSET_IN_256B_BLOCK(addr) ((addr) & 0xffULL) + +const uint32_t + umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = { + {2, 18, 11, 27}, {4, 20, 13, 29}, + {1, 17, 8, 24}, {7, 23, 14, 30}, + {10, 26, 3, 19}, {12, 28, 5, 21}, + {9, 25, 0, 16}, {15, 31, 6, 22} +}; + +static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev, + uint32_t umc_instance) +{ + uint32_t rsmu_umc_index; + + rsmu_umc_index = RREG32_SOC15(RSMU, 0, + mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); + rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, + RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_MODE_EN, 1); + rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, + RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_INSTANCE, umc_instance); + rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, + RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_WREN, 1 << umc_instance); + WREG32_SOC15(RSMU, 0, mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + rsmu_umc_index); +} + +static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev) +{ + WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_MODE_EN, 0); +} + +static uint32_t umc_v6_1_get_umc_inst(struct amdgpu_device *adev) +{ + uint32_t rsmu_umc_index; + + rsmu_umc_index = RREG32_SOC15(RSMU, 0, + mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); + return REG_GET_FIELD(rsmu_umc_index, + RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_INSTANCE); +} + +static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + uint32_t ecc_err_cnt, ecc_err_cnt_addr; + uint64_t mc_umc_status; + uint32_t mc_umc_status_addr; + + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt); + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + + /* select the lower chip and check the error count */ + ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset); + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 0); + WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); + ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset); + *error_count += + (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - + UMC_V6_1_CE_CNT_INIT); + /* clear the lower chip err count */ + WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); + + /* select the higher chip and check the err counter */ + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 1); + WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); + ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset); + *error_count += + (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - + UMC_V6_1_CE_CNT_INIT); + /* clear the higher chip err count */ + WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); + + /* check for SRAM correctable error + MCUMC_STATUS is a 64 bit register */ + mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + *error_count += 1; +} + +static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint64_t mc_umc_status; + uint32_t mc_umc_status_addr; + + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + + /* check the MCUMC_STATUS */ + mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); + if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) + *error_count += 1; +} + +static void umc_v6_1_query_error_count(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint32_t umc_reg_offset, + uint32_t channel_index) +{ + umc_v6_1_query_correctable_error_count(adev, umc_reg_offset, + &(err_data->ce_count)); + umc_v6_1_querry_uncorrectable_error_count(adev, umc_reg_offset, + &(err_data->ue_count)); +} + +static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + amdgpu_umc_for_each_channel(umc_v6_1_query_error_count); +} + +static void umc_v6_1_query_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, + uint32_t umc_reg_offset, uint32_t channel_index) +{ + uint32_t lsb, mc_umc_status_addr; + uint64_t mc_umc_status, err_addr, retired_page; + struct eeprom_table_record *err_rec; + + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + + /* skip error address process if -ENOMEM */ + if (!err_data->err_addr) { + /* clear umc status */ + WREG64_UMC(mc_umc_status_addr + umc_reg_offset, 0x0ULL); + return; + } + + err_rec = &err_data->err_addr[err_data->err_addr_cnt]; + mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); + + /* calculate error address if ue/ce error is detected */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { + err_addr = RREG64_PCIE(smnMCA_UMC0_MCUMC_ADDRT0 + umc_reg_offset * 4); + + /* the lowest lsb bits should be ignored */ + lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB); + err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + err_addr &= ~((0x1ULL << lsb) - 1); + + /* translate umc channel address to soc pa, 3 parts are included */ + retired_page = ADDR_OF_8KB_BLOCK(err_addr) | + ADDR_OF_256B_BLOCK(channel_index) | + OFFSET_IN_256B_BLOCK(err_addr); + + /* we only save ue error information currently, ce is skipped */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) + == 1) { + err_rec->address = err_addr; + /* page frame address is saved */ + err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT; + err_rec->ts = (uint64_t)ktime_get_real_seconds(); + err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; + err_rec->cu = 0; + err_rec->mem_channel = channel_index; + err_rec->mcumc_id = umc_v6_1_get_umc_inst(adev); + + err_data->err_addr_cnt++; + } + } + + /* clear umc status */ + WREG64_UMC(mc_umc_status_addr + umc_reg_offset, 0x0ULL); +} + +static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, + void *ras_error_status) +{ + amdgpu_umc_for_each_channel(umc_v6_1_query_error_address); +} + +static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev, + struct ras_err_data *err_data, + uint32_t umc_reg_offset, uint32_t channel_index) +{ + uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + uint32_t ecc_err_cnt_addr; + + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt); + + /* select the lower chip and check the error count */ + ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset); + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 0); + /* set ce error interrupt type to APIC based interrupt */ + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrInt, 0x1); + WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); + /* set error count to initial value */ + WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); + + /* select the higher chip and check the err counter */ + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 1); + WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); + WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); +} + +static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev) +{ + void *ras_error_status = NULL; + + amdgpu_umc_for_each_channel(umc_v6_1_err_cnt_init_per_channel); +} + +const struct amdgpu_umc_funcs umc_v6_1_funcs = { + .err_cnt_init = umc_v6_1_err_cnt_init, + .ras_late_init = amdgpu_umc_ras_late_init, + .query_ras_error_count = umc_v6_1_query_ras_error_count, + .query_ras_error_address = umc_v6_1_query_ras_error_address, + .enable_umc_index_mode = umc_v6_1_enable_umc_index_mode, + .disable_umc_index_mode = umc_v6_1_disable_umc_index_mode, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h new file mode 100644 index 000000000000..dab9cbd292c5 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h @@ -0,0 +1,51 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __UMC_V6_1_H__ +#define __UMC_V6_1_H__ + +#include "soc15_common.h" +#include "amdgpu.h" + +/* HBM Memory Channel Width */ +#define UMC_V6_1_HBM_MEMORY_CHANNEL_WIDTH 128 +/* number of umc channel instance with memory map register access */ +#define UMC_V6_1_CHANNEL_INSTANCE_NUM 4 +/* number of umc instance with memory map register access */ +#define UMC_V6_1_UMC_INSTANCE_NUM 8 +/* total channel instances in one umc block */ +#define UMC_V6_1_TOTAL_CHANNEL_NUM (UMC_V6_1_CHANNEL_INSTANCE_NUM * UMC_V6_1_UMC_INSTANCE_NUM) +/* UMC regiser per channel offset */ +#define UMC_V6_1_PER_CHANNEL_OFFSET 0x800 + +/* EccErrCnt max value */ +#define UMC_V6_1_CE_CNT_MAX 0xffff +/* umc ce interrupt threshold */ +#define UMC_V6_1_CE_INT_THRESHOLD 0xffff +/* umc ce count initial value */ +#define UMC_V6_1_CE_CNT_INIT (UMC_V6_1_CE_CNT_MAX - UMC_V6_1_CE_INT_THRESHOLD) + +extern const struct amdgpu_umc_funcs umc_v6_1_funcs; +extern const uint32_t + umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM]; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index c4fb58667fd4..82abd8e728ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -23,7 +23,7 @@ */ #include <linux/firmware.h> -#include <drm/drmP.h> + #include "amdgpu.h" #include "amdgpu_uvd.h" #include "cikd.h" @@ -491,7 +491,7 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring) tmp = RREG32(mmUVD_CONTEXT_ID); if (tmp == 0xDEADBEEF) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) @@ -741,6 +741,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = uvd_v4_2_ring_get_rptr, .get_wptr = uvd_v4_2_ring_get_wptr, .set_wptr = uvd_v4_2_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 52bd8a654734..01e62fb8e6e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -22,8 +22,9 @@ * Authors: Christian König <christian.koenig@amd.com> */ +#include <linux/delay.h> #include <linux/firmware.h> -#include <drm/drmP.h> + #include "amdgpu.h" #include "amdgpu_uvd.h" #include "vid.h" @@ -506,7 +507,7 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring) tmp = RREG32(mmUVD_CONTEXT_ID); if (tmp == 0xDEADBEEF) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) @@ -849,6 +850,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = uvd_v5_0_ring_get_rptr, .get_wptr = uvd_v5_0_ring_get_wptr, .set_wptr = uvd_v5_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index c9edddf9f88a..217084d56ab8 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -23,7 +23,7 @@ */ #include <linux/firmware.h> -#include <drm/drmP.h> + #include "amdgpu.h" #include "amdgpu_uvd.h" #include "vid.h" @@ -170,20 +170,23 @@ static void uvd_v6_0_enc_ring_set_wptr(struct amdgpu_ring *ring) static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t rptr = amdgpu_ring_get_rptr(ring); + uint32_t rptr; unsigned i; int r; r = amdgpu_ring_alloc(ring, 16); if (r) return r; + + rptr = amdgpu_ring_get_rptr(ring); + amdgpu_ring_write(ring, HEVC_ENC_CMD_END); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { if (amdgpu_ring_get_rptr(ring) != rptr) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) @@ -203,13 +206,14 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring) * Open up a stream for HW test */ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + struct amdgpu_bo *bo, struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t dummy; + uint64_t addr; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -217,15 +221,15 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle return r; ib = &job->ibs[0]; - dummy = ib->gpu_addr + 1024; + addr = amdgpu_bo_gpu_offset(bo); ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ ib->ptr[ib->length_dw++] = handle; ib->ptr[ib->length_dw++] = 0x00010000; - ib->ptr[ib->length_dw++] = upper_32_bits(dummy); - ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = addr; ib->ptr[ib->length_dw++] = 0x00000014; ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ @@ -265,13 +269,14 @@ err: */ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, + struct amdgpu_bo *bo, struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t dummy; + uint64_t addr; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -279,15 +284,15 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, return r; ib = &job->ibs[0]; - dummy = ib->gpu_addr + 1024; + addr = amdgpu_bo_gpu_offset(bo); ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ ib->ptr[ib->length_dw++] = handle; ib->ptr[ib->length_dw++] = 0x00010000; - ib->ptr[ib->length_dw++] = upper_32_bits(dummy); - ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = addr; ib->ptr[ib->length_dw++] = 0x00000014; ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ @@ -324,13 +329,20 @@ err: static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct dma_fence *fence = NULL; + struct amdgpu_bo *bo = NULL; long r; - r = uvd_v6_0_enc_get_create_msg(ring, 1, NULL); + r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &bo, NULL, NULL); + if (r) + return r; + + r = uvd_v6_0_enc_get_create_msg(ring, 1, bo, NULL); if (r) goto error; - r = uvd_v6_0_enc_get_destroy_msg(ring, 1, &fence); + r = uvd_v6_0_enc_get_destroy_msg(ring, 1, bo, &fence); if (r) goto error; @@ -342,6 +354,8 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) error: dma_fence_put(fence); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); return r; } @@ -957,7 +971,7 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) tmp = RREG32(mmUVD_CONTEXT_ID); if (tmp == 0xDEADBEEF) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) @@ -1502,6 +1516,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = uvd_v6_0_ring_get_rptr, .get_wptr = uvd_v6_0_ring_get_wptr, .set_wptr = uvd_v6_0_ring_set_wptr, @@ -1527,6 +1542,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = uvd_v6_0_ring_get_rptr, .get_wptr = uvd_v6_0_ring_get_wptr, .set_wptr = uvd_v6_0_ring_set_wptr, @@ -1555,6 +1571,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = { .align_mask = 0x3f, .nop = HEVC_ENC_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = uvd_v6_0_enc_ring_get_rptr, .get_wptr = uvd_v6_0_enc_ring_get_wptr, .set_wptr = uvd_v6_0_enc_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index dc461df48da0..0995378d8263 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -22,7 +22,7 @@ */ #include <linux/firmware.h> -#include <drm/drmP.h> + #include "amdgpu.h" #include "amdgpu_uvd.h" #include "soc15.h" @@ -175,7 +175,7 @@ static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring) static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t rptr = amdgpu_ring_get_rptr(ring); + uint32_t rptr; unsigned i; int r; @@ -185,13 +185,16 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 16); if (r) return r; + + rptr = amdgpu_ring_get_rptr(ring); + amdgpu_ring_write(ring, HEVC_ENC_CMD_END); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { if (amdgpu_ring_get_rptr(ring) != rptr) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) @@ -211,13 +214,14 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) * Open up a stream for HW test */ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + struct amdgpu_bo *bo, struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t dummy; + uint64_t addr; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -225,15 +229,15 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle return r; ib = &job->ibs[0]; - dummy = ib->gpu_addr + 1024; + addr = amdgpu_bo_gpu_offset(bo); ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ ib->ptr[ib->length_dw++] = handle; ib->ptr[ib->length_dw++] = 0x00000000; - ib->ptr[ib->length_dw++] = upper_32_bits(dummy); - ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = addr; ib->ptr[ib->length_dw++] = 0x00000014; ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ @@ -272,13 +276,14 @@ err: * Close up a stream for HW test or if userspace failed to do so */ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - struct dma_fence **fence) + struct amdgpu_bo *bo, + struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t dummy; + uint64_t addr; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -286,15 +291,15 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl return r; ib = &job->ibs[0]; - dummy = ib->gpu_addr + 1024; + addr = amdgpu_bo_gpu_offset(bo); ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; ib->ptr[ib->length_dw++] = handle; ib->ptr[ib->length_dw++] = 0x00000000; - ib->ptr[ib->length_dw++] = upper_32_bits(dummy); - ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = addr; ib->ptr[ib->length_dw++] = 0x00000014; ib->ptr[ib->length_dw++] = 0x00000002; @@ -331,13 +336,20 @@ err: static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct dma_fence *fence = NULL; + struct amdgpu_bo *bo = NULL; long r; - r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL); + r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &bo, NULL, NULL); + if (r) + return r; + + r = uvd_v7_0_enc_get_create_msg(ring, 1, bo, NULL); if (r) goto error; - r = uvd_v7_0_enc_get_destroy_msg(ring, 1, &fence); + r = uvd_v7_0_enc_get_destroy_msg(ring, 1, bo, &fence); if (r) goto error; @@ -349,6 +361,8 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) error: dma_fence_put(fence); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); return r; } @@ -787,10 +801,13 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) 0xFFFFFFFF, 0x00000004); /* mc resume*/ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), - lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), - upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, + mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_lo); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, + mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_hi); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0); offset = 0; } else { MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), @@ -798,10 +815,11 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), upper_32_bits(adev->uvd.inst[i].gpu_addr)); offset = size; + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0), - AMDGPU_UVD_FIRMWARE_OFFSET >> 3); MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0), size); MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), @@ -1223,7 +1241,7 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) tmp = RREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID); if (tmp == 0xDEADBEEF) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) @@ -1755,7 +1773,8 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .support_64bit_ptrs = false, - .vmhub = AMDGPU_MMHUB, + .no_user_fence = true, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = uvd_v7_0_ring_get_rptr, .get_wptr = uvd_v7_0_ring_get_wptr, .set_wptr = uvd_v7_0_ring_set_wptr, @@ -1787,7 +1806,8 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { .align_mask = 0x3f, .nop = HEVC_ENC_CMD_NO_OP, .support_64bit_ptrs = false, - .vmhub = AMDGPU_MMHUB, + .no_user_fence = true, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = uvd_v7_0_enc_ring_get_rptr, .get_wptr = uvd_v7_0_enc_ring_get_wptr, .set_wptr = uvd_v7_0_enc_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index bed78a778e3f..b6837fcfdba7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -26,7 +26,7 @@ */ #include <linux/firmware.h> -#include <drm/drmP.h> + #include "amdgpu.h" #include "amdgpu_vce.h" #include "cikd.h" @@ -283,7 +283,7 @@ static int vce_v2_0_stop(struct amdgpu_device *adev) } if (vce_v2_0_wait_for_idle(adev)) { - DRM_INFO("VCE is busy, Can't set clock gateing"); + DRM_INFO("VCE is busy, Can't set clock gating"); return 0; } @@ -605,6 +605,7 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = { .align_mask = 0xf, .nop = VCE_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = vce_v2_0_ring_get_rptr, .get_wptr = vce_v2_0_ring_get_wptr, .set_wptr = vce_v2_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 6ec65cf11112..475ae68f38f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -26,7 +26,7 @@ */ #include <linux/firmware.h> -#include <drm/drmP.h> + #include "amdgpu.h" #include "amdgpu_vce.h" #include "vid.h" @@ -894,6 +894,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = { .align_mask = 0xf, .nop = VCE_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = vce_v3_0_ring_get_rptr, .get_wptr = vce_v3_0_ring_get_wptr, .set_wptr = vce_v3_0_ring_set_wptr, @@ -917,6 +918,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = { .align_mask = 0xf, .nop = VCE_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = vce_v3_0_ring_get_rptr, .get_wptr = vce_v3_0_ring_get_wptr, .set_wptr = vce_v3_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index aadc3e66ebd7..683701cf7270 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -25,7 +25,7 @@ */ #include <linux/firmware.h> -#include <drm/drmP.h> + #include "amdgpu.h" #include "amdgpu_vce.h" #include "soc15.h" @@ -244,13 +244,18 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev) MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); + offset = AMDGPU_VCE_FIRMWARE_OFFSET; if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo; + uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi; + uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low; + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, - mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), - adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); + mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8); MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), - (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff); + (tmr_mc_addr >> 40) & 0xff); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0); } else { MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), @@ -258,6 +263,9 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev) MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), (adev->vce.gpu_addr >> 40) & 0xff); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), + offset & ~0x0f000000); + } MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), @@ -272,10 +280,7 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev) mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff); - offset = AMDGPU_VCE_FIRMWARE_OFFSET; size = VCE_V4_0_FW_SIZE; - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), - offset & ~0x0f000000); MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; @@ -382,6 +387,7 @@ static int vce_v4_0_start(struct amdgpu_device *adev) static int vce_v4_0_stop(struct amdgpu_device *adev) { + /* Disable VCPU */ WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001); /* hold on ECPU */ @@ -389,8 +395,8 @@ static int vce_v4_0_stop(struct amdgpu_device *adev) VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); - /* clear BUSY flag */ - WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); + /* clear VCE_STATUS */ + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0); /* Set Clock-Gating off */ /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG) @@ -922,6 +928,7 @@ static int vce_v4_0_set_clockgating_state(void *handle, return 0; } +#endif static int vce_v4_0_set_powergating_state(void *handle, enum amd_powergating_state state) @@ -935,16 +942,11 @@ static int vce_v4_0_set_powergating_state(void *handle, */ struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE)) - return 0; - if (state == AMD_PG_STATE_GATE) - /* XXX do we need a vce_v4_0_stop()? */ - return 0; + return vce_v4_0_stop(adev); else return vce_v4_0_start(adev); } -#endif static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, uint32_t flags) @@ -1059,7 +1061,7 @@ const struct amd_ip_funcs vce_v4_0_ip_funcs = { .soft_reset = NULL /* vce_v4_0_soft_reset */, .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */, .set_clockgating_state = vce_v4_0_set_clockgating_state, - .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */, + .set_powergating_state = vce_v4_0_set_powergating_state, }; static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { @@ -1067,7 +1069,8 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { .align_mask = 0x3f, .nop = VCE_CMD_NO_OP, .support_64bit_ptrs = false, - .vmhub = AMDGPU_MMHUB, + .no_user_fence = true, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = vce_v4_0_ring_get_rptr, .get_wptr = vce_v4_0_ring_get_wptr, .set_wptr = vce_v4_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 3dbc51f9d3b9..b4f84a820a44 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -22,7 +22,7 @@ */ #include <linux/firmware.h> -#include <drm/drmP.h> + #include "amdgpu.h" #include "amdgpu_vcn.h" #include "soc15.h" @@ -49,6 +49,8 @@ static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev); static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr); static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state); +static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, + struct dpg_pause_state *new_state); /** * vcn_v1_0_early_init - set function pointers @@ -61,6 +63,7 @@ static int vcn_v1_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->vcn.num_vcn_inst = 1; adev->vcn.num_enc_rings = 2; vcn_v1_0_set_dec_ring_funcs(adev); @@ -85,20 +88,21 @@ static int vcn_v1_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* VCN DEC TRAP */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, VCN_1_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.irq); + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_1_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst->irq); if (r) return r; /* VCN ENC TRAP */ for (i = 0; i < adev->vcn.num_enc_rings; ++i) { r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + VCN_1_0__SRCID__UVD_ENC_GENERAL_PURPOSE, - &adev->vcn.irq); + &adev->vcn.inst->irq); if (r) return r; } /* VCN JPEG TRAP */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->vcn.irq); + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->vcn.inst->irq); if (r) return r; @@ -120,27 +124,42 @@ static int vcn_v1_0_sw_init(void *handle) if (r) return r; - ring = &adev->vcn.ring_dec; + ring = &adev->vcn.inst->ring_dec; sprintf(ring->name, "vcn_dec"); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); if (r) return r; + adev->vcn.internal.scratch9 = adev->vcn.inst->external.scratch9 = + SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9); + adev->vcn.internal.data0 = adev->vcn.inst->external.data0 = + SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0); + adev->vcn.internal.data1 = adev->vcn.inst->external.data1 = + SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1); + adev->vcn.internal.cmd = adev->vcn.inst->external.cmd = + SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD); + adev->vcn.internal.nop = adev->vcn.inst->external.nop = + SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP); + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.ring_enc[i]; + ring = &adev->vcn.inst->ring_enc[i]; sprintf(ring->name, "vcn_enc%d", i); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); if (r) return r; } - ring = &adev->vcn.ring_jpeg; + ring = &adev->vcn.inst->ring_jpeg; sprintf(ring->name, "vcn_jpeg"); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); if (r) return r; - return r; + adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode; + adev->vcn.internal.jpeg_pitch = adev->vcn.inst->external.jpeg_pitch = + SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH); + + return 0; } /** @@ -174,7 +193,7 @@ static int vcn_v1_0_sw_fini(void *handle) static int vcn_v1_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; int i, r; r = amdgpu_ring_test_helper(ring); @@ -182,14 +201,13 @@ static int vcn_v1_0_hw_init(void *handle) goto done; for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.ring_enc[i]; - ring->sched.ready = true; + ring = &adev->vcn.inst->ring_enc[i]; r = amdgpu_ring_test_helper(ring); if (r) goto done; } - ring = &adev->vcn.ring_jpeg; + ring = &adev->vcn.inst->ring_jpeg; r = amdgpu_ring_test_helper(ring); if (r) goto done; @@ -212,7 +230,7 @@ done: static int vcn_v1_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || RREG32_SOC15(VCN, 0, mmUVD_STATUS)) @@ -287,9 +305,9 @@ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev) offset = 0; } else { WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr)); + lower_32_bits(adev->vcn.inst->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr)); + upper_32_bits(adev->vcn.inst->gpu_addr)); offset = size; WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3); @@ -299,17 +317,17 @@ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev) /* cache window 1: stack */ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + offset)); + lower_32_bits(adev->vcn.inst->gpu_addr + offset)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + offset)); + upper_32_bits(adev->vcn.inst->gpu_addr + offset)); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); /* cache window 2: context */ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); @@ -357,9 +375,9 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev) offset = 0; } else { WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr), 0xFFFFFFFF, 0); + lower_32_bits(adev->vcn.inst->gpu_addr), 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr), 0xFFFFFFFF, 0); + upper_32_bits(adev->vcn.inst->gpu_addr), 0xFFFFFFFF, 0); offset = size; WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0xFFFFFFFF, 0); @@ -369,9 +387,9 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev) /* cache window 1: stack */ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + offset), 0xFFFFFFFF, 0); + lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + offset), 0xFFFFFFFF, 0); + upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0, 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE, @@ -379,10 +397,10 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev) /* cache window 2: context */ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), + lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), + upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0, 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE, @@ -762,7 +780,7 @@ static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev) */ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; uint32_t lmi_swap_cntl; int i, j, r; @@ -915,21 +933,21 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev) WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0, ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); - ring = &adev->vcn.ring_enc[0]; + ring = &adev->vcn.inst->ring_enc[0]; WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); - ring = &adev->vcn.ring_enc[1]; + ring = &adev->vcn.inst->ring_enc[1]; WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); - ring = &adev->vcn.ring_jpeg; + ring = &adev->vcn.inst->ring_jpeg; WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); @@ -951,7 +969,7 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev) static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; uint32_t lmi_swap_cntl; @@ -1089,7 +1107,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev) ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); /* initialize JPEG wptr */ - ring = &adev->vcn.ring_jpeg; + ring = &adev->vcn.inst->ring_jpeg; ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); /* copy patch commands to the jpeg ring */ @@ -1204,6 +1222,132 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev) return r; } +static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, + struct dpg_pause_state *new_state) +{ + int ret_code; + uint32_t reg_data = 0; + uint32_t reg_data2 = 0; + struct amdgpu_ring *ring; + + /* pause/unpause if state is changed */ + if (adev->vcn.pause_state.fw_based != new_state->fw_based) { + DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", + adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, + new_state->fw_based, new_state->jpeg); + + reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + ret_code = 0; + + if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK)) + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + if (!ret_code) { + /* pause DPG non-jpeg */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); + + /* Restore */ + ring = &adev->vcn.inst->ring_enc[0]; + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + + ring = &adev->vcn.inst->ring_enc[1]; + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + + ring = &adev->vcn.inst->ring_dec; + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, + RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + } + } else { + /* unpause dpg non-jpeg, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + } + adev->vcn.pause_state.fw_based = new_state->fw_based; + } + + /* pause/unpause if state is changed */ + if (adev->vcn.pause_state.jpeg != new_state->jpeg) { + DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", + adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, + new_state->fw_based, new_state->jpeg); + + reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK); + + if (new_state->jpeg == VCN_DPG_STATE__PAUSE) { + ret_code = 0; + + if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK)) + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + if (!ret_code) { + /* Make sure JPRG Snoop is disabled before sending the pause */ + reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS); + reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK; + WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2); + + /* pause DPG jpeg */ + reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, + UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code); + + /* Restore */ + ring = &adev->vcn.inst->ring_jpeg; + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, + UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | + UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, + UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); + + ring = &adev->vcn.inst->ring_dec; + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, + RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + } + } else { + /* unpause dpg jpeg, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + } + adev->vcn.pause_state.jpeg = new_state->jpeg; + } + + return 0; +} + static bool vcn_v1_0_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1453,7 +1597,7 @@ static uint64_t vcn_v1_0_enc_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vcn.ring_enc[0]) + if (ring == &adev->vcn.inst->ring_enc[0]) return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); else return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); @@ -1470,7 +1614,7 @@ static uint64_t vcn_v1_0_enc_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vcn.ring_enc[0]) + if (ring == &adev->vcn.inst->ring_enc[0]) return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); else return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); @@ -1487,7 +1631,7 @@ static void vcn_v1_0_enc_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vcn.ring_enc[0]) + if (ring == &adev->vcn.inst->ring_enc[0]) WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); else @@ -1971,16 +2115,16 @@ static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev, switch (entry->src_id) { case 124: - amdgpu_fence_process(&adev->vcn.ring_dec); + amdgpu_fence_process(&adev->vcn.inst->ring_dec); break; case 119: - amdgpu_fence_process(&adev->vcn.ring_enc[0]); + amdgpu_fence_process(&adev->vcn.inst->ring_enc[0]); break; case 120: - amdgpu_fence_process(&adev->vcn.ring_enc[1]); + amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]); break; case 126: - amdgpu_fence_process(&adev->vcn.ring_jpeg); + amdgpu_fence_process(&adev->vcn.inst->ring_jpeg); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", @@ -2054,7 +2198,8 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, .support_64bit_ptrs = false, - .vmhub = AMDGPU_MMHUB, + .no_user_fence = true, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v1_0_dec_ring_get_rptr, .get_wptr = vcn_v1_0_dec_ring_get_wptr, .set_wptr = vcn_v1_0_dec_ring_set_wptr, @@ -2087,7 +2232,8 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, .support_64bit_ptrs = false, - .vmhub = AMDGPU_MMHUB, + .no_user_fence = true, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v1_0_enc_ring_get_rptr, .get_wptr = vcn_v1_0_enc_ring_get_wptr, .set_wptr = vcn_v1_0_enc_ring_set_wptr, @@ -2118,7 +2264,8 @@ static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = { .align_mask = 0xf, .nop = PACKET0(0x81ff, 0), .support_64bit_ptrs = false, - .vmhub = AMDGPU_MMHUB, + .no_user_fence = true, + .vmhub = AMDGPU_MMHUB_0, .extra_dw = 64, .get_rptr = vcn_v1_0_jpeg_ring_get_rptr, .get_wptr = vcn_v1_0_jpeg_ring_get_wptr, @@ -2149,7 +2296,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = { static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) { - adev->vcn.ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs; + adev->vcn.inst->ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs; DRM_INFO("VCN decode is enabled in VM mode\n"); } @@ -2158,14 +2305,14 @@ static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev) int i; for (i = 0; i < adev->vcn.num_enc_rings; ++i) - adev->vcn.ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs; + adev->vcn.inst->ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs; DRM_INFO("VCN encode is enabled in VM mode\n"); } static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev) { - adev->vcn.ring_jpeg.funcs = &vcn_v1_0_jpeg_ring_vm_funcs; + adev->vcn.inst->ring_jpeg.funcs = &vcn_v1_0_jpeg_ring_vm_funcs; DRM_INFO("VCN jpeg decode is enabled in VM mode\n"); } @@ -2176,8 +2323,8 @@ static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = { static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->vcn.irq.num_types = adev->vcn.num_enc_rings + 2; - adev->vcn.irq.funcs = &vcn_v1_0_irq_funcs; + adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 2; + adev->vcn.inst->irq.funcs = &vcn_v1_0_irq_funcs; } const struct amdgpu_ip_block_version vcn_v1_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c new file mode 100644 index 000000000000..38f787a560cb --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -0,0 +1,2292 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> + +#include "amdgpu.h" +#include "amdgpu_vcn.h" +#include "soc15.h" +#include "soc15d.h" +#include "amdgpu_pm.h" +#include "amdgpu_psp.h" + +#include "vcn/vcn_2_0_0_offset.h" +#include "vcn/vcn_2_0_0_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h" + +#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x1fd +#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x503 +#define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET 0x504 +#define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET 0x505 +#define mmUVD_NO_OP_INTERNAL_OFFSET 0x53f +#define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET 0x54a +#define mmUVD_SCRATCH9_INTERNAL_OFFSET 0xc01d + +#define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET 0x1e1 +#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x5a6 +#define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x5a7 +#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x1e2 + +#define mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 0x1bfff +#define mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET 0x4029 +#define mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET 0x402a +#define mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET 0x402b +#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ea +#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40eb +#define mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET 0x40cf +#define mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET 0x40d1 +#define mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40e8 +#define mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40e9 +#define mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET 0x4082 +#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ec +#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40ed +#define mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET 0x4085 +#define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084 +#define mmUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089 +#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f + +#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 + +#define mmUVD_RBC_XX_IB_REG_CHECK 0x026b +#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1 +#define mmUVD_REG_XX_MASK 0x026c +#define mmUVD_REG_XX_MASK_BASE_IDX 1 + +static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev); +static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev); +static void vcn_v2_0_set_jpeg_ring_funcs(struct amdgpu_device *adev); +static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev); +static int vcn_v2_0_set_powergating_state(void *handle, + enum amd_powergating_state state); +static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, + struct dpg_pause_state *new_state); + +/** + * vcn_v2_0_early_init - set function pointers + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + */ +static int vcn_v2_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->vcn.num_vcn_inst = 1; + adev->vcn.num_enc_rings = 2; + + vcn_v2_0_set_dec_ring_funcs(adev); + vcn_v2_0_set_enc_ring_funcs(adev); + vcn_v2_0_set_jpeg_ring_funcs(adev); + vcn_v2_0_set_irq_funcs(adev); + + return 0; +} + +/** + * vcn_v2_0_sw_init - sw init for VCN block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int vcn_v2_0_sw_init(void *handle) +{ + struct amdgpu_ring *ring; + int i, r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* VCN DEC TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, + &adev->vcn.inst->irq); + if (r) + return r; + + /* VCN ENC TRAP */ + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, + &adev->vcn.inst->irq); + if (r) + return r; + } + + /* VCN JPEG TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_2_0__SRCID__JPEG_DECODE, &adev->vcn.inst->irq); + if (r) + return r; + + r = amdgpu_vcn_sw_init(adev); + if (r) + return r; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + const struct common_firmware_header *hdr; + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); + DRM_INFO("PSP loading VCN firmware\n"); + } + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + ring = &adev->vcn.inst->ring_dec; + + ring->use_doorbell = true; + ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1; + + sprintf(ring->name, "vcn_dec"); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); + if (r) + return r; + + adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; + adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; + adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; + adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; + adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; + adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; + + adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; + adev->vcn.inst->external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9); + adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; + adev->vcn.inst->external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0); + adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; + adev->vcn.inst->external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1); + adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; + adev->vcn.inst->external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD); + adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; + adev->vcn.inst->external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP); + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + ring = &adev->vcn.inst->ring_enc[i]; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i; + sprintf(ring->name, "vcn_enc%d", i); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); + if (r) + return r; + } + + ring = &adev->vcn.inst->ring_jpeg; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; + sprintf(ring->name, "vcn_jpeg"); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); + if (r) + return r; + + adev->vcn.pause_dpg_mode = vcn_v2_0_pause_dpg_mode; + + adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->vcn.inst->external.jpeg_pitch = SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH); + + return 0; +} + +/** + * vcn_v2_0_sw_fini - sw fini for VCN block + * + * @handle: amdgpu_device pointer + * + * VCN suspend and free up sw allocation + */ +static int vcn_v2_0_sw_fini(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_vcn_suspend(adev); + if (r) + return r; + + r = amdgpu_vcn_sw_fini(adev); + + return r; +} + +/** + * vcn_v2_0_hw_init - start and test VCN block + * + * @handle: amdgpu_device pointer + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +static int vcn_v2_0_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; + int i, r; + + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ring->doorbell_index, 0); + + r = amdgpu_ring_test_helper(ring); + if (r) + goto done; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + ring = &adev->vcn.inst->ring_enc[i]; + r = amdgpu_ring_test_helper(ring); + if (r) + goto done; + } + + ring = &adev->vcn.inst->ring_jpeg; + r = amdgpu_ring_test_helper(ring); + if (r) + goto done; + +done: + if (!r) + DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); + + return r; +} + +/** + * vcn_v2_0_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the VCN block, mark ring as not ready any more + */ +static int vcn_v2_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; + int i; + + if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || + (adev->vcn.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(VCN, 0, mmUVD_STATUS))) + vcn_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + + ring->sched.ready = false; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + ring = &adev->vcn.inst->ring_enc[i]; + ring->sched.ready = false; + } + + ring = &adev->vcn.inst->ring_jpeg; + ring->sched.ready = false; + + return 0; +} + +/** + * vcn_v2_0_suspend - suspend VCN block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend VCN block + */ +static int vcn_v2_0_suspend(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = vcn_v2_0_hw_fini(adev); + if (r) + return r; + + r = amdgpu_vcn_suspend(adev); + + return r; +} + +/** + * vcn_v2_0_resume - resume VCN block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init VCN block + */ +static int vcn_v2_0_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + r = vcn_v2_0_hw_init(adev); + + return r; +} + +/** + * vcn_v2_0_mc_resume - memory controller programming + * + * @adev: amdgpu_device pointer + * + * Let the VCN memory controller know it's offsets + */ +static void vcn_v2_0_mc_resume(struct amdgpu_device *adev) +{ + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t offset; + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi)); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0); + offset = 0; + } else { + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst->gpu_addr)); + offset = size; + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size); + + /* cache window 1: stack */ + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst->gpu_addr + offset)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst->gpu_addr + offset)); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); + + /* cache window 2: context */ + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + + WREG32_SOC15(UVD, 0, mmUVD_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config); +} + +static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirect) +{ + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t offset; + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (!indirect) { + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } else { + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } + offset = 0; + } else { + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst->gpu_addr), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst->gpu_addr), 0, indirect); + offset = size; + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); + } + + if (!indirect) + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect); + else + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); + + /* cache window 1: stack */ + if (!indirect) { + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } else { + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); + + /* cache window 2: context */ + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); + + /* non-cache window */ + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect); + + /* VCN global tiling registers */ + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); +} + +/** + * vcn_v2_0_disable_clock_gating - disable VCN clock gating + * + * @adev: amdgpu_device pointer + * @sw: enable SW clock gating + * + * Disable clock gating for VCN block + */ +static void vcn_v2_0_disable_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data; + + /* UVD disable CGC */ + data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, 0, mmUVD_CGC_GATE); + data &= ~(UVD_CGC_GATE__SYS_MASK + | UVD_CGC_GATE__UDEC_MASK + | UVD_CGC_GATE__MPEG2_MASK + | UVD_CGC_GATE__REGS_MASK + | UVD_CGC_GATE__RBC_MASK + | UVD_CGC_GATE__LMI_MC_MASK + | UVD_CGC_GATE__LMI_UMC_MASK + | UVD_CGC_GATE__IDCT_MASK + | UVD_CGC_GATE__MPRD_MASK + | UVD_CGC_GATE__MPC_MASK + | UVD_CGC_GATE__LBSI_MASK + | UVD_CGC_GATE__LRBBM_MASK + | UVD_CGC_GATE__UDEC_RE_MASK + | UVD_CGC_GATE__UDEC_CM_MASK + | UVD_CGC_GATE__UDEC_IT_MASK + | UVD_CGC_GATE__UDEC_DB_MASK + | UVD_CGC_GATE__UDEC_MP_MASK + | UVD_CGC_GATE__WCB_MASK + | UVD_CGC_GATE__VCPU_MASK + | UVD_CGC_GATE__SCPU_MASK); + WREG32_SOC15(VCN, 0, mmUVD_CGC_GATE, data); + + data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); + data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK + | UVD_CGC_CTRL__UDEC_CM_MODE_MASK + | UVD_CGC_CTRL__UDEC_IT_MODE_MASK + | UVD_CGC_CTRL__UDEC_DB_MODE_MASK + | UVD_CGC_CTRL__UDEC_MP_MODE_MASK + | UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__UDEC_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__IDCT_MODE_MASK + | UVD_CGC_CTRL__MPRD_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK + | UVD_CGC_CTRL__SCPU_MODE_MASK); + WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); + + /* turn on */ + data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE); + data |= (UVD_SUVD_CGC_GATE__SRE_MASK + | UVD_SUVD_CGC_GATE__SIT_MASK + | UVD_SUVD_CGC_GATE__SMP_MASK + | UVD_SUVD_CGC_GATE__SCM_MASK + | UVD_SUVD_CGC_GATE__SDB_MASK + | UVD_SUVD_CGC_GATE__SRE_H264_MASK + | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK + | UVD_SUVD_CGC_GATE__SIT_H264_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCM_H264_MASK + | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK + | UVD_SUVD_CGC_GATE__SDB_H264_MASK + | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCLR_MASK + | UVD_SUVD_CGC_GATE__UVD_SC_MASK + | UVD_SUVD_CGC_GATE__ENT_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK + | UVD_SUVD_CGC_GATE__SITE_MASK + | UVD_SUVD_CGC_GATE__SRE_VP9_MASK + | UVD_SUVD_CGC_GATE__SCM_VP9_MASK + | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK + | UVD_SUVD_CGC_GATE__SDB_VP9_MASK + | UVD_SUVD_CGC_GATE__IME_HEVC_MASK); + WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE, data); + + data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL); + data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK + | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); + WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data); +} + +static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev, + uint8_t sram_sel, uint8_t indirect) +{ + uint32_t reg_data = 0; + + /* enable sw clock gating control */ + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK | + UVD_CGC_CTRL__UDEC_CM_MODE_MASK | + UVD_CGC_CTRL__UDEC_IT_MODE_MASK | + UVD_CGC_CTRL__UDEC_DB_MODE_MASK | + UVD_CGC_CTRL__UDEC_MP_MODE_MASK | + UVD_CGC_CTRL__SYS_MODE_MASK | + UVD_CGC_CTRL__UDEC_MODE_MASK | + UVD_CGC_CTRL__MPEG2_MODE_MASK | + UVD_CGC_CTRL__REGS_MODE_MASK | + UVD_CGC_CTRL__RBC_MODE_MASK | + UVD_CGC_CTRL__LMI_MC_MODE_MASK | + UVD_CGC_CTRL__LMI_UMC_MODE_MASK | + UVD_CGC_CTRL__IDCT_MODE_MASK | + UVD_CGC_CTRL__MPRD_MODE_MASK | + UVD_CGC_CTRL__MPC_MODE_MASK | + UVD_CGC_CTRL__LBSI_MODE_MASK | + UVD_CGC_CTRL__LRBBM_MODE_MASK | + UVD_CGC_CTRL__WCB_MODE_MASK | + UVD_CGC_CTRL__VCPU_MODE_MASK | + UVD_CGC_CTRL__SCPU_MODE_MASK); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect); + + /* turn off clock gating */ + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect); + + /* turn on SUVD clock gating */ + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect); + + /* turn on sw mode in UVD_SUVD_CGC_CTRL */ + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect); +} + +/** + * jpeg_v2_0_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v2_0_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->vcn.inst->ring_jpeg; + uint32_t tmp; + int r = 0; + + /* disable power gating */ + tmp = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT; + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_PGFSM_CONFIG), tmp); + + SOC15_WAIT_ON_RREG(VCN, 0, + mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON, + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r); + + if (r) { + DRM_ERROR("amdgpu: JPEG disable power gating failed\n"); + return r; + } + + /* Removing the anti hang mechanism to indicate the UVDJ tile is ON */ + tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS)) & ~0x1; + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS), tmp); + + /* JPEG disable CGC */ + tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); + tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, tmp); + + tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE); + tmp &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK + | JPEG_CGC_GATE__JPEG2_DEC_MASK + | JPEG_CGC_GATE__JPEG_ENC_MASK + | JPEG_CGC_GATE__JMCIF_MASK + | JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, tmp); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(VCN, 0, mmJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC_MASK, + ~JPEG_SYS_INT_EN__DJRBC_MASK); + + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); + + return 0; +} + +/** + * jpeg_v2_0_stop - stop JPEG block + * + * @adev: amdgpu_device pointer + * + * stop the JPEG block + */ +static int jpeg_v2_0_stop(struct amdgpu_device *adev) +{ + uint32_t tmp; + int r = 0; + + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable JPEG CGC */ + tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); + tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, tmp); + + + tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE); + tmp |= (JPEG_CGC_GATE__JPEG_DEC_MASK + |JPEG_CGC_GATE__JPEG2_DEC_MASK + |JPEG_CGC_GATE__JPEG_ENC_MASK + |JPEG_CGC_GATE__JMCIF_MASK + |JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, tmp); + + /* enable power gating */ + tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS)); + tmp &= ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK; + tmp |= 0x1; //UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_TILES_OFF; + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS), tmp); + + tmp = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT; + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_PGFSM_CONFIG), tmp); + + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, + (2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT), + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r); + + if (r) { + DRM_ERROR("amdgpu: JPEG enable power gating failed\n"); + return r; + } + + return r; +} + +/** + * vcn_v2_0_enable_clock_gating - enable VCN clock gating + * + * @adev: amdgpu_device pointer + * @sw: enable SW clock gating + * + * Enable clock gating for VCN block + */ +static void vcn_v2_0_enable_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + + /* enable UVD CGC */ + data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); + data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK + | UVD_CGC_CTRL__UDEC_CM_MODE_MASK + | UVD_CGC_CTRL__UDEC_IT_MODE_MASK + | UVD_CGC_CTRL__UDEC_DB_MODE_MASK + | UVD_CGC_CTRL__UDEC_MP_MODE_MASK + | UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__UDEC_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__IDCT_MODE_MASK + | UVD_CGC_CTRL__MPRD_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK + | UVD_CGC_CTRL__SCPU_MODE_MASK); + WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL); + data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK + | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); + WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data); +} + +static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + int ret; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { + data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT); + + WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, + UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON_2_0, 0xFFFFF, ret); + } else { + data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT); + WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0, 0xFFFFF, ret); + } + + /* polling UVD_PGFSM_STATUS to confirm UVDM_PWR_STATUS, + * UVDU_PWR_STATUS are 0 (power on) */ + + data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS); + data &= ~0x103; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) + data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON | + UVD_POWER_STATUS__UVD_PG_EN_MASK; + + WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data); +} + +static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + int ret; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { + /* Before power off, this indicator has to be turned on */ + data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS); + data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK; + data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF; + WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data); + + + data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT); + + WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data); + + data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDU_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDC_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDIL_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDIR_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT); + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFF, ret); + } +} + +static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) +{ + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; + uint32_t rb_bufsz, tmp; + + vcn_v2_0_enable_static_power_gating(adev); + + /* enable dynamic power gating mode */ + tmp = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS); + tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; + tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK; + WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, tmp); + + if (indirect) + adev->vcn.dpg_sram_curr_addr = (uint32_t*)adev->vcn.dpg_sram_cpu_addr; + + /* enable clock gating */ + vcn_v2_0_clock_gating_dpg_mode(adev, 0, indirect); + + /* enable VCPU clock */ + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + tmp |= UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK; + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect); + + /* disable master interupt */ + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_MASTINT_EN), 0, 0, indirect); + + /* setup mmUVD_LMI_CTRL */ + tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + UVD_LMI_CTRL__CRC_RESET_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + 0x00100000L); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_CTRL), tmp, 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_MPC_CNTL), + 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_MPC_SET_MUXA0), + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_MPC_SET_MUXB0), + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_MPC_SET_MUX), + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect); + + vcn_v2_0_mc_resume_dpg_mode(adev, indirect); + + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect); + + /* release VCPU reset to boot */ + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_SOFT_RESET), 0, 0, indirect); + + /* enable LMI MC and UMC channels */ + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_CTRL2), + 0x1F << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT, 0, indirect); + + /* enable master interrupt */ + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); + + if (indirect) + psp_update_vcn_sram(adev, 0, adev->vcn.dpg_sram_gpu_addr, + (uint32_t)((uintptr_t)adev->vcn.dpg_sram_curr_addr - + (uintptr_t)adev->vcn.dpg_sram_cpu_addr)); + + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp); + + /* set the write pointer delay */ + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0); + + /* set the wb address */ + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR, + (upper_32_bits(ring->gpu_addr) >> 2)); + + /* programm the RB_BASE for ring buffer */ + WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0); + + WREG32_SOC15(UVD, 0, mmUVD_SCRATCH2, 0); + + ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr)); + + return 0; +} + +static int vcn_v2_0_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; + uint32_t rb_bufsz, tmp; + uint32_t lmi_swap_cntl; + int i, j, r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, true); + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + r = vcn_v2_0_start_dpg_mode(adev, adev->vcn.indirect_sram); + if (r) + return r; + goto jpeg; + } + + vcn_v2_0_disable_static_power_gating(adev); + + /* set uvd status busy */ + tmp = RREG32_SOC15(UVD, 0, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY; + WREG32_SOC15(UVD, 0, mmUVD_STATUS, tmp); + + /*SW clock gating */ + vcn_v2_0_disable_clock_gating(adev); + + /* enable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + + /* disable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* setup mmUVD_LMI_CTRL */ + tmp = RREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL); + WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL, tmp | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + + /* setup mmUVD_MPC_CNTL */ + tmp = RREG32_SOC15(UVD, 0, mmUVD_MPC_CNTL); + tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; + tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; + WREG32_SOC15(VCN, 0, mmUVD_MPC_CNTL, tmp); + + /* setup UVD_MPC_SET_MUXA0 */ + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); + + /* setup UVD_MPC_SET_MUXB0 */ + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); + + /* setup mmUVD_MPC_SET_MUX */ + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); + + vcn_v2_0_mc_resume(adev); + + /* release VCPU reset to boot */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0, + ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + + /* enable LMI MC and UMC channels */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + tmp = RREG32_SOC15(VCN, 0, mmUVD_SOFT_RESET); + tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, 0, mmUVD_SOFT_RESET, tmp); + + /* disable byte swapping */ + lmi_swap_cntl = 0; +#ifdef __BIG_ENDIAN + /* swap (8 in 32) RB and IB */ + lmi_swap_cntl = 0xa; +#endif + WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl); + + for (i = 0; i < 10; ++i) { + uint32_t status; + + for (j = 0; j < 100; ++j) { + status = RREG32_SOC15(UVD, 0, mmUVD_STATUS); + if (status & 2) + break; + mdelay(10); + } + r = 0; + if (status & 2) + break; + + DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n"); + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK, + ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0, + ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(10); + r = -1; + } + + if (r) { + DRM_ERROR("VCN decode not responding, giving up!!!\n"); + return r; + } + + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* clear the busy bit of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + + WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_VMID, 0); + + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp); + + /* programm the RB_BASE for ring buffer */ + WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0); + + ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr)); + + ring = &adev->vcn.inst->ring_enc[0]; + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); + + ring = &adev->vcn.inst->ring_enc[1]; + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); + +jpeg: + r = jpeg_v2_0_start(adev); + + return r; +} + +static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev) +{ + int ret_code = 0; + uint32_t tmp; + + /* Wait for power status to be 1 */ + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + /* wait for read ptr to be equal to write ptr */ + tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + + tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code); + + tmp = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_JRBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + + tmp = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF; + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + /* disable dynamic power gating mode */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + + return 0; +} + +static int vcn_v2_0_stop(struct amdgpu_device *adev) +{ + uint32_t tmp; + int r; + + r = jpeg_v2_0_stop(adev); + if (r) + return r; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + r = vcn_v2_0_stop_dpg_mode(adev); + if (r) + return r; + goto power_off; + } + + /* wait for uvd idle */ + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r); + if (r) + return r; + + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp, r); + if (r) + return r; + + /* stall UMC channel */ + tmp = RREG32_SOC15(VCN, 0, mmUVD_LMI_CTRL2); + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; + WREG32_SOC15(VCN, 0, mmUVD_LMI_CTRL2, tmp); + + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK| + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp, r); + if (r) + return r; + + /* disable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + + /* reset LMI UMC */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK, + ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK); + + /* reset LMI */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + UVD_SOFT_RESET__LMI_SOFT_RESET_MASK, + ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK); + + /* reset VCPU */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK, + ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + + /* clear status */ + WREG32_SOC15(VCN, 0, mmUVD_STATUS, 0); + + vcn_v2_0_enable_clock_gating(adev); + vcn_v2_0_enable_static_power_gating(adev); + +power_off: + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, false); + + return 0; +} + +static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, + struct dpg_pause_state *new_state) +{ + struct amdgpu_ring *ring; + uint32_t reg_data = 0; + int ret_code; + + /* pause/unpause if state is changed */ + if (adev->vcn.pause_state.fw_based != new_state->fw_based) { + DRM_DEBUG("dpg pause state changed %d -> %d", + adev->vcn.pause_state.fw_based, new_state->fw_based); + reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + ret_code = 0; + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 0x1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + if (!ret_code) { + /* pause DPG */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + + /* wait for ACK */ + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); + + /* Restore */ + ring = &adev->vcn.inst->ring_enc[0]; + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + + ring = &adev->vcn.inst->ring_enc[1]; + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, + RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); + + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + } + } else { + /* unpause dpg, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + } + adev->vcn.pause_state.fw_based = new_state->fw_based; + } + + return 0; +} + +static bool vcn_v2_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == UVD_STATUS__IDLE); +} + +static int vcn_v2_0_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret = 0; + + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, + UVD_STATUS__IDLE, ret); + + return ret; +} + +static int vcn_v2_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + + if (enable) { + /* wait for STATUS to clear */ + if (vcn_v2_0_is_idle(handle)) + return -EBUSY; + vcn_v2_0_enable_clock_gating(adev); + } else { + /* disable HW gating and enable Sw gating */ + vcn_v2_0_disable_clock_gating(adev); + } + return 0; +} + +/** + * vcn_v2_0_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t vcn_v2_0_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); +} + +/** + * vcn_v2_0_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t vcn_v2_0_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR); +} + +/** + * vcn_v2_0_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void vcn_v2_0_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + WREG32_SOC15(UVD, 0, mmUVD_SCRATCH2, + lower_32_bits(ring->wptr) | 0x80000000); + + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +/** + * vcn_v2_0_dec_ring_insert_start - insert a start command + * + * @ring: amdgpu_ring pointer + * + * Write a start command to the ring. + */ +void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); + amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1)); +} + +/** + * vcn_v2_0_dec_ring_insert_end - insert a end command + * + * @ring: amdgpu_ring pointer + * + * Write a end command to the ring. + */ +void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); + amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_END << 1)); +} + +/** + * vcn_v2_0_dec_ring_insert_nop - insert a nop command + * + * @ring: amdgpu_ring pointer + * + * Write a nop command to the ring. + */ +void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + struct amdgpu_device *adev = ring->adev; + int i; + + WARN_ON(ring->wptr % 2 || count % 2); + + for (i = 0; i < count / 2; i++) { + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.nop, 0)); + amdgpu_ring_write(ring, 0); + } +} + +/** + * vcn_v2_0_dec_ring_emit_fence - emit an fence & trap command + * + * @ring: amdgpu_ring pointer + * @fence: fence to emit + * + * Write a fence and a trap command to the ring. + */ +void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) +{ + struct amdgpu_device *adev = ring->adev; + + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.context_id, 0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); + amdgpu_ring_write(ring, addr & 0xffffffff); + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0)); + amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff); + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); + amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_FENCE << 1)); + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); + + amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_TRAP << 1)); +} + +/** + * vcn_v2_0_dec_ring_emit_ib - execute indirect buffer + * + * @ring: amdgpu_ring pointer + * @ib: indirect buffer to execute + * + * Write ring commands to execute the indirect buffer + */ +void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + struct amdgpu_device *adev = ring->adev; + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_vmid, 0)); + amdgpu_ring_write(ring, vmid); + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_low, 0)); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_high, 0)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_size, 0)); + amdgpu_ring_write(ring, ib->length_dw); +} + +void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); + amdgpu_ring_write(ring, reg << 2); + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0)); + amdgpu_ring_write(ring, val); + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.gp_scratch8, 0)); + amdgpu_ring_write(ring, mask); + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); + + amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_REG_READ_COND_WAIT << 1)); +} + +void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + uint32_t data0, data1, mask; + + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* wait for register write */ + data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data1 = lower_32_bits(pd_addr); + mask = 0xffffffff; + vcn_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask); +} + +void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); + amdgpu_ring_write(ring, reg << 2); + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0)); + amdgpu_ring_write(ring, val); + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); + + amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_WRITE_REG << 1)); +} + +/** + * vcn_v2_0_enc_ring_get_rptr - get enc read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware enc read pointer + */ +static uint64_t vcn_v2_0_enc_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.inst->ring_enc[0]) + return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); + else + return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); +} + + /** + * vcn_v2_0_enc_ring_get_wptr - get enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware enc write pointer + */ +static uint64_t vcn_v2_0_enc_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.inst->ring_enc[0]) { + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); + } else { + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); + } +} + + /** + * vcn_v2_0_enc_ring_set_wptr - set enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the enc write pointer to the hardware + */ +static void vcn_v2_0_enc_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.inst->ring_enc[0]) { + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + } + } else { + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + } + } +} + +/** + * vcn_v2_0_enc_ring_emit_fence - emit an enc fence & trap command + * + * @ring: amdgpu_ring pointer + * @fence: fence to emit + * + * Write enc a fence and a trap command to the ring. + */ +void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned flags) +{ + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, VCN_ENC_CMD_FENCE); + amdgpu_ring_write(ring, addr); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, seq); + amdgpu_ring_write(ring, VCN_ENC_CMD_TRAP); +} + +void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, VCN_ENC_CMD_END); +} + +/** + * vcn_v2_0_enc_ring_emit_ib - enc execute indirect buffer + * + * @ring: amdgpu_ring pointer + * @ib: indirect buffer to execute + * + * Write enc ring commands to execute the indirect buffer + */ +void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + + amdgpu_ring_write(ring, VCN_ENC_CMD_IB); + amdgpu_ring_write(ring, vmid); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, ib->length_dw); +} + +void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, val); +} + +void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* wait for reg writes */ + vcn_v2_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2, + lower_32_bits(pd_addr), 0xffffffff); +} + +void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) +{ + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, val); +} + +/** + * vcn_v2_0_jpeg_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t vcn_v2_0_jpeg_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR); +} + +/** + * vcn_v2_0_jpeg_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t vcn_v2_0_jpeg_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); +} + +/** + * vcn_v2_0_jpeg_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void vcn_v2_0_jpeg_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +/** + * vcn_v2_0_jpeg_ring_insert_start - insert a start command + * + * @ring: amdgpu_ring pointer + * + * Write a start command to the ring. + */ +void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x68e04); + + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x80010000); +} + +/** + * vcn_v2_0_jpeg_ring_insert_end - insert a end command + * + * @ring: amdgpu_ring pointer + * + * Write a end command to the ring. + */ +void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x68e04); + + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x00010000); +} + +/** + * vcn_v2_0_jpeg_ring_emit_fence - emit an fence & trap command + * + * @ring: amdgpu_ring pointer + * @fence: fence to emit + * + * Write a fence and a trap command to the ring. + */ +void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) +{ + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x8); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, + 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x3fbc); + + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x1); + + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7)); + amdgpu_ring_write(ring, 0); +} + +/** + * vcn_v2_0_jpeg_ring_emit_ib - execute indirect buffer + * + * @ring: amdgpu_ring pointer + * @ib: indirect buffer to execute + * + * Write ring commands to execute the indirect buffer. + */ +void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, ib->length_dw); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr)); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr)); + + amdgpu_ring_write(ring, PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x01400200); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x2); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_STATUS_INTERNAL_OFFSET, + 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3)); + amdgpu_ring_write(ring, 0x2); +} + +void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + uint32_t reg_offset = (reg << 2); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x01400200); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, val); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3)); + } else { + amdgpu_ring_write(ring, reg_offset); + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE3)); + } + amdgpu_ring_write(ring, mask); +} + +void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + uint32_t data0, data1, mask; + + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* wait for register write */ + data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data1 = lower_32_bits(pd_addr); + mask = 0xffffffff; + vcn_v2_0_jpeg_ring_emit_reg_wait(ring, data0, data1, mask); +} + +void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) +{ + uint32_t reg_offset = (reg << 2); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0)); + } else { + amdgpu_ring_write(ring, reg_offset); + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE0)); + } + amdgpu_ring_write(ring, val); +} + +void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + + WARN_ON(ring->wptr % 2 || count % 2); + + for (i = 0; i < count / 2; i++) { + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); + amdgpu_ring_write(ring, 0); + } +} + +static int vcn_v2_0_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int vcn_v2_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("IH: VCN TRAP\n"); + + switch (entry->src_id) { + case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT: + amdgpu_fence_process(&adev->vcn.inst->ring_dec); + break; + case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE: + amdgpu_fence_process(&adev->vcn.inst->ring_enc[0]); + break; + case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY: + amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]); + break; + case VCN_2_0__SRCID__JPEG_DECODE: + amdgpu_fence_process(&adev->vcn.inst->ring_jpeg); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t tmp = 0; + unsigned i; + int r; + + WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD); + r = amdgpu_ring_alloc(ring, 4); + if (r) + return r; + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); + amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0)); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9); + if (tmp == 0xDEADBEEF) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + + return r; +} + + +static int vcn_v2_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + /* This doesn't actually powergate the VCN block. + * That's done in the dpm code via the SMC. This + * just re-inits the block as necessary. The actual + * gating still happens in the dpm code. We should + * revisit this when there is a cleaner line between + * the smc and the hw blocks + */ + int ret; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (state == adev->vcn.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = vcn_v2_0_stop(adev); + else + ret = vcn_v2_0_start(adev); + + if (!ret) + adev->vcn.cur_state = state; + return ret; +} + +static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { + .name = "vcn_v2_0", + .early_init = vcn_v2_0_early_init, + .late_init = NULL, + .sw_init = vcn_v2_0_sw_init, + .sw_fini = vcn_v2_0_sw_fini, + .hw_init = vcn_v2_0_hw_init, + .hw_fini = vcn_v2_0_hw_fini, + .suspend = vcn_v2_0_suspend, + .resume = vcn_v2_0_resume, + .is_idle = vcn_v2_0_is_idle, + .wait_for_idle = vcn_v2_0_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = vcn_v2_0_set_clockgating_state, + .set_powergating_state = vcn_v2_0_set_powergating_state, +}; + +static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_DEC, + .align_mask = 0xf, + .vmhub = AMDGPU_MMHUB_0, + .get_rptr = vcn_v2_0_dec_ring_get_rptr, + .get_wptr = vcn_v2_0_dec_ring_get_wptr, + .set_wptr = vcn_v2_0_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* vcn_v2_0_dec_ring_emit_vm_flush */ + 14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */ + 6, + .emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */ + .emit_ib = vcn_v2_0_dec_ring_emit_ib, + .emit_fence = vcn_v2_0_dec_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush, + .test_ring = vcn_v2_0_dec_ring_test_ring, + .test_ib = amdgpu_vcn_dec_ring_test_ib, + .insert_nop = vcn_v2_0_dec_ring_insert_nop, + .insert_start = vcn_v2_0_dec_ring_insert_start, + .insert_end = vcn_v2_0_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_dec_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_ENC, + .align_mask = 0x3f, + .nop = VCN_ENC_CMD_NO_OP, + .vmhub = AMDGPU_MMHUB_0, + .get_rptr = vcn_v2_0_enc_ring_get_rptr, + .get_wptr = vcn_v2_0_enc_ring_get_wptr, + .set_wptr = vcn_v2_0_enc_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + + 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ + 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ + 1, /* vcn_v2_0_enc_ring_insert_end */ + .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ + .emit_ib = vcn_v2_0_enc_ring_emit_ib, + .emit_fence = vcn_v2_0_enc_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_enc_ring_test_ring, + .test_ib = amdgpu_vcn_enc_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_end = vcn_v2_0_enc_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static const struct amdgpu_ring_funcs vcn_v2_0_jpeg_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .vmhub = AMDGPU_MMHUB_0, + .get_rptr = vcn_v2_0_jpeg_ring_get_rptr, + .get_wptr = vcn_v2_0_jpeg_ring_get_wptr, + .set_wptr = vcn_v2_0_jpeg_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* vcn_v2_0_jpeg_ring_emit_vm_flush */ + 18 + 18 + /* vcn_v2_0_jpeg_ring_emit_fence x2 vm fence */ + 8 + 16, + .emit_ib_size = 22, /* vcn_v2_0_jpeg_ring_emit_ib */ + .emit_ib = vcn_v2_0_jpeg_ring_emit_ib, + .emit_fence = vcn_v2_0_jpeg_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_jpeg_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_jpeg_ring_test_ring, + .test_ib = amdgpu_vcn_jpeg_ring_test_ib, + .insert_nop = vcn_v2_0_jpeg_ring_nop, + .insert_start = vcn_v2_0_jpeg_ring_insert_start, + .insert_end = vcn_v2_0_jpeg_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_jpeg_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_jpeg_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + adev->vcn.inst->ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs; + DRM_INFO("VCN decode is enabled in VM mode\n"); +} + +static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) + adev->vcn.inst->ring_enc[i].funcs = &vcn_v2_0_enc_ring_vm_funcs; + + DRM_INFO("VCN encode is enabled in VM mode\n"); +} + +static void vcn_v2_0_set_jpeg_ring_funcs(struct amdgpu_device *adev) +{ + adev->vcn.inst->ring_jpeg.funcs = &vcn_v2_0_jpeg_ring_vm_funcs; + DRM_INFO("VCN jpeg decode is enabled in VM mode\n"); +} + +static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = { + .set = vcn_v2_0_set_interrupt_state, + .process = vcn_v2_0_process_interrupt, +}; + +static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 2; + adev->vcn.inst->irq.funcs = &vcn_v2_0_irq_funcs; +} + +const struct amdgpu_ip_block_version vcn_v2_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_VCN, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &vcn_v2_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h new file mode 100644 index 000000000000..8467292f32e5 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h @@ -0,0 +1,67 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VCN_V2_0_H__ +#define __VCN_V2_0_H__ + +extern void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring); +extern void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring); +extern void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); +extern void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags); +extern void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, + struct amdgpu_ib *ib, uint32_t flags); +extern void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask); +extern void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr); +extern void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val); + +extern void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring); +extern void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned flags); +extern void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, + struct amdgpu_ib *ib, uint32_t flags); +extern void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask); +extern void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vmid, uint64_t pd_addr); +extern void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); + +extern void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring); +extern void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring); +extern void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags); +extern void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, + struct amdgpu_ib *ib, uint32_t flags); +extern void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask); +extern void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr); +extern void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); +extern void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count); + +extern const struct amdgpu_ip_block_version vcn_v2_0_ip_block; + +#endif /* __VCN_V2_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c new file mode 100644 index 000000000000..93edf9193a7b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -0,0 +1,1411 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> + +#include "amdgpu.h" +#include "amdgpu_vcn.h" +#include "amdgpu_pm.h" +#include "soc15.h" +#include "soc15d.h" +#include "vcn_v2_0.h" + +#include "vcn/vcn_2_5_offset.h" +#include "vcn/vcn_2_5_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h" + +#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27 +#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f +#define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET 0x10 +#define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET 0x11 +#define mmUVD_NO_OP_INTERNAL_OFFSET 0x29 +#define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET 0x66 +#define mmUVD_SCRATCH9_INTERNAL_OFFSET 0xc01d + +#define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET 0x431 +#define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x3b4 +#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x3b5 +#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x25c + +#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f + +#define VCN25_MAX_HW_INSTANCES_ARCTURUS 2 + +static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); +static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev); +static void vcn_v2_5_set_jpeg_ring_funcs(struct amdgpu_device *adev); +static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev); +static int vcn_v2_5_set_powergating_state(void *handle, + enum amd_powergating_state state); + +static int amdgpu_ih_clientid_vcns[] = { + SOC15_IH_CLIENTID_VCN, + SOC15_IH_CLIENTID_VCN1 +}; + +/** + * vcn_v2_5_early_init - set function pointers + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + */ +static int vcn_v2_5_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->asic_type == CHIP_ARCTURUS) { + u32 harvest; + int i; + + adev->vcn.num_vcn_inst = VCN25_MAX_HW_INSTANCES_ARCTURUS; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + harvest = RREG32_SOC15(UVD, i, mmCC_UVD_HARVESTING); + if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) + adev->vcn.harvest_config |= 1 << i; + } + + if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 | + AMDGPU_VCN_HARVEST_VCN1)) + /* both instances are harvested, disable the block */ + return -ENOENT; + } else + adev->vcn.num_vcn_inst = 1; + + adev->vcn.num_enc_rings = 2; + + vcn_v2_5_set_dec_ring_funcs(adev); + vcn_v2_5_set_enc_ring_funcs(adev); + vcn_v2_5_set_jpeg_ring_funcs(adev); + vcn_v2_5_set_irq_funcs(adev); + + return 0; +} + +/** + * vcn_v2_5_sw_init - sw init for VCN block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int vcn_v2_5_sw_init(void *handle) +{ + struct amdgpu_ring *ring; + int i, j, r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (j = 0; j < adev->vcn.num_vcn_inst; j++) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + /* VCN DEC TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], + VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[j].irq); + if (r) + return r; + + /* VCN ENC TRAP */ + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], + i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[j].irq); + if (r) + return r; + } + + /* VCN JPEG TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], + VCN_2_0__SRCID__JPEG_DECODE, &adev->vcn.inst[j].irq); + if (r) + return r; + } + + r = amdgpu_vcn_sw_init(adev); + if (r) + return r; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + const struct common_firmware_header *hdr; + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); + + if (adev->vcn.num_vcn_inst == VCN25_MAX_HW_INSTANCES_ARCTURUS) { + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].ucode_id = AMDGPU_UCODE_ID_VCN1; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].fw = adev->vcn.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); + } + DRM_INFO("PSP loading VCN firmware\n"); + } + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + for (j = 0; j < adev->vcn.num_vcn_inst; j++) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; + adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; + adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; + adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; + adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; + adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; + + adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; + adev->vcn.inst[j].external.scratch9 = SOC15_REG_OFFSET(UVD, j, mmUVD_SCRATCH9); + adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; + adev->vcn.inst[j].external.data0 = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_DATA0); + adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; + adev->vcn.inst[j].external.data1 = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_DATA1); + adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; + adev->vcn.inst[j].external.cmd = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_CMD); + adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; + adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(UVD, j, mmUVD_NO_OP); + + adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->vcn.inst[j].external.jpeg_pitch = SOC15_REG_OFFSET(UVD, j, mmUVD_JPEG_PITCH); + + ring = &adev->vcn.inst[j].ring_dec; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8*j; + sprintf(ring->name, "vcn_dec_%d", j); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); + if (r) + return r; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + ring = &adev->vcn.inst[j].ring_enc[i]; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i + 8*j; + sprintf(ring->name, "vcn_enc_%d.%d", j, i); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); + if (r) + return r; + } + + ring = &adev->vcn.inst[j].ring_jpeg; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8*j; + sprintf(ring->name, "vcn_jpeg_%d", j); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); + if (r) + return r; + } + + return 0; +} + +/** + * vcn_v2_5_sw_fini - sw fini for VCN block + * + * @handle: amdgpu_device pointer + * + * VCN suspend and free up sw allocation + */ +static int vcn_v2_5_sw_fini(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_vcn_suspend(adev); + if (r) + return r; + + r = amdgpu_vcn_sw_fini(adev); + + return r; +} + +/** + * vcn_v2_5_hw_init - start and test VCN block + * + * @handle: amdgpu_device pointer + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +static int vcn_v2_5_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i, j, r; + + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + ring = &adev->vcn.inst[j].ring_dec; + + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ring->doorbell_index, j); + + r = amdgpu_ring_test_helper(ring); + if (r) + goto done; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + ring = &adev->vcn.inst[j].ring_enc[i]; + r = amdgpu_ring_test_helper(ring); + if (r) + goto done; + } + + ring = &adev->vcn.inst[j].ring_jpeg; + r = amdgpu_ring_test_helper(ring); + if (r) + goto done; + } +done: + if (!r) + DRM_INFO("VCN decode and encode initialized successfully.\n"); + + return r; +} + +/** + * vcn_v2_5_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the VCN block, mark ring as not ready any more + */ +static int vcn_v2_5_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i, j; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + ring = &adev->vcn.inst[i].ring_dec; + + if (RREG32_SOC15(VCN, i, mmUVD_STATUS)) + vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + + ring->sched.ready = false; + + for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + ring = &adev->vcn.inst[i].ring_enc[j]; + ring->sched.ready = false; + } + + ring = &adev->vcn.inst[i].ring_jpeg; + ring->sched.ready = false; + } + + return 0; +} + +/** + * vcn_v2_5_suspend - suspend VCN block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend VCN block + */ +static int vcn_v2_5_suspend(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = vcn_v2_5_hw_fini(adev); + if (r) + return r; + + r = amdgpu_vcn_suspend(adev); + + return r; +} + +/** + * vcn_v2_5_resume - resume VCN block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init VCN block + */ +static int vcn_v2_5_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + r = vcn_v2_5_hw_init(adev); + + return r; +} + +/** + * vcn_v2_5_mc_resume - memory controller programming + * + * @adev: amdgpu_device pointer + * + * Let the VCN memory controller know it's offsets + */ +static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) +{ + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t offset; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0); + offset = 0; + } else { + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[i].gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[i].gpu_addr)); + offset = size; + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size); + + /* cache window 1: stack */ + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[i].gpu_addr + offset)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[i].gpu_addr + offset)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, 0); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); + + /* cache window 2: context */ + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, 0); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + } +} + +/** + * vcn_v2_5_disable_clock_gating - disable VCN clock gating + * + * @adev: amdgpu_device pointer + * + * Disable clock gating for VCN block + */ +static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data; + int ret = 0; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* UVD disable CGC */ + data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, i, mmUVD_CGC_GATE); + data &= ~(UVD_CGC_GATE__SYS_MASK + | UVD_CGC_GATE__UDEC_MASK + | UVD_CGC_GATE__MPEG2_MASK + | UVD_CGC_GATE__REGS_MASK + | UVD_CGC_GATE__RBC_MASK + | UVD_CGC_GATE__LMI_MC_MASK + | UVD_CGC_GATE__LMI_UMC_MASK + | UVD_CGC_GATE__IDCT_MASK + | UVD_CGC_GATE__MPRD_MASK + | UVD_CGC_GATE__MPC_MASK + | UVD_CGC_GATE__LBSI_MASK + | UVD_CGC_GATE__LRBBM_MASK + | UVD_CGC_GATE__UDEC_RE_MASK + | UVD_CGC_GATE__UDEC_CM_MASK + | UVD_CGC_GATE__UDEC_IT_MASK + | UVD_CGC_GATE__UDEC_DB_MASK + | UVD_CGC_GATE__UDEC_MP_MASK + | UVD_CGC_GATE__WCB_MASK + | UVD_CGC_GATE__VCPU_MASK + | UVD_CGC_GATE__MMSCH_MASK); + + WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data); + + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, ret); + + data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); + data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK + | UVD_CGC_CTRL__UDEC_CM_MODE_MASK + | UVD_CGC_CTRL__UDEC_IT_MODE_MASK + | UVD_CGC_CTRL__UDEC_DB_MODE_MASK + | UVD_CGC_CTRL__UDEC_MP_MODE_MASK + | UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__UDEC_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__IDCT_MODE_MASK + | UVD_CGC_CTRL__MPRD_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK + | UVD_CGC_CTRL__MMSCH_MODE_MASK); + WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + + /* turn on */ + data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE); + data |= (UVD_SUVD_CGC_GATE__SRE_MASK + | UVD_SUVD_CGC_GATE__SIT_MASK + | UVD_SUVD_CGC_GATE__SMP_MASK + | UVD_SUVD_CGC_GATE__SCM_MASK + | UVD_SUVD_CGC_GATE__SDB_MASK + | UVD_SUVD_CGC_GATE__SRE_H264_MASK + | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK + | UVD_SUVD_CGC_GATE__SIT_H264_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCM_H264_MASK + | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK + | UVD_SUVD_CGC_GATE__SDB_H264_MASK + | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCLR_MASK + | UVD_SUVD_CGC_GATE__UVD_SC_MASK + | UVD_SUVD_CGC_GATE__ENT_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK + | UVD_SUVD_CGC_GATE__SITE_MASK + | UVD_SUVD_CGC_GATE__SRE_VP9_MASK + | UVD_SUVD_CGC_GATE__SCM_VP9_MASK + | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK + | UVD_SUVD_CGC_GATE__SDB_VP9_MASK + | UVD_SUVD_CGC_GATE__IME_HEVC_MASK); + WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE, data); + + data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL); + data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK + | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); + WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data); + } +} + +/** + * vcn_v2_5_enable_clock_gating - enable VCN clock gating + * + * @adev: amdgpu_device pointer + * + * Enable clock gating for VCN block + */ +static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* enable UVD CGC */ + data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); + data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK + | UVD_CGC_CTRL__UDEC_CM_MODE_MASK + | UVD_CGC_CTRL__UDEC_IT_MODE_MASK + | UVD_CGC_CTRL__UDEC_DB_MODE_MASK + | UVD_CGC_CTRL__UDEC_MP_MODE_MASK + | UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__UDEC_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__IDCT_MODE_MASK + | UVD_CGC_CTRL__MPRD_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK); + WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL); + data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK + | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); + WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data); + } +} + +/** + * jpeg_v2_5_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v2_5_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + uint32_t tmp; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + ring = &adev->vcn.inst[i].ring_jpeg; + /* disable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + /* JPEG disable CGC */ + tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL); + tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL, tmp); + + tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_GATE); + tmp &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK + | JPEG_CGC_GATE__JPEG2_DEC_MASK + | JPEG_CGC_GATE__JMCIF_MASK + | JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(VCN, i, mmJPEG_CGC_GATE, tmp); + + tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL); + tmp &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK + | JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK + | JPEG_CGC_CTRL__JMCIF_MODE_MASK + | JPEG_CGC_CTRL__JRBBM_MODE_MASK); + WREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL, tmp); + + /* MJPEG global tiling registers */ + WREG32_SOC15(UVD, i, mmJPEG_DEC_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, i, mmJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC_MASK, + ~JPEG_SYS_INT_EN__DJRBC_MASK); + + WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(UVD, i, mmUVD_JRBC_RB_WPTR); + } + + return 0; +} + +/** + * jpeg_v2_5_stop - stop JPEG block + * + * @adev: amdgpu_device pointer + * + * stop the JPEG block + */ +static int jpeg_v2_5_stop(struct amdgpu_device *adev) +{ + uint32_t tmp; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_GATE); + tmp |= (JPEG_CGC_GATE__JPEG_DEC_MASK + |JPEG_CGC_GATE__JPEG2_DEC_MASK + |JPEG_CGC_GATE__JMCIF_MASK + |JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(VCN, i, mmJPEG_CGC_GATE, tmp); + + /* enable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JPEG_POWER_STATUS), + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + } + + return 0; +} + +static int vcn_v2_5_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + uint32_t rb_bufsz, tmp; + int i, j, k, r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, true); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* disable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* set uvd status busy */ + tmp = RREG32_SOC15(UVD, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY; + WREG32_SOC15(UVD, i, mmUVD_STATUS, tmp); + } + + /*SW clock gating */ + vcn_v2_5_disable_clock_gating(adev); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* enable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + + /* disable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* setup mmUVD_LMI_CTRL */ + tmp = RREG32_SOC15(UVD, i, mmUVD_LMI_CTRL); + tmp &= ~0xff; + WREG32_SOC15(UVD, i, mmUVD_LMI_CTRL, tmp | 0x8| + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + + /* setup mmUVD_MPC_CNTL */ + tmp = RREG32_SOC15(UVD, i, mmUVD_MPC_CNTL); + tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; + tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; + WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp); + + /* setup UVD_MPC_SET_MUXA0 */ + WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUXA0, + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); + + /* setup UVD_MPC_SET_MUXB0 */ + WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUXB0, + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); + + /* setup mmUVD_MPC_SET_MUX */ + WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUX, + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); + } + + vcn_v2_5_mc_resume(adev); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* VCN global tiling registers */ + WREG32_SOC15(UVD, i, mmUVD_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, i, mmUVD_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable LMI MC and UMC channels */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + /* unblock VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_ARB_CTRL), 0, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + for (k = 0; k < 10; ++k) { + uint32_t status; + + for (j = 0; j < 100; ++j) { + status = RREG32_SOC15(UVD, i, mmUVD_STATUS); + if (status & 2) + break; + if (amdgpu_emu_mode == 1) + msleep(500); + else + mdelay(10); + } + r = 0; + if (status & 2) + break; + + DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n"); + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + mdelay(10); + r = -1; + } + + if (r) { + DRM_ERROR("VCN decode not responding, giving up!!!\n"); + return r; + } + + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* clear the busy bit of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + + WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_VMID, 0); + + ring = &adev->vcn.inst[i].ring_dec; + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, tmp); + + /* programm the RB_BASE for ring buffer */ + WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR, 0); + + ring->wptr = RREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(UVD, i, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr)); + ring = &adev->vcn.inst[i].ring_enc[0]; + WREG32_SOC15(UVD, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_RB_SIZE, ring->ring_size / 4); + + ring = &adev->vcn.inst[i].ring_enc[1]; + WREG32_SOC15(UVD, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_RB_SIZE2, ring->ring_size / 4); + } + r = jpeg_v2_5_start(adev); + + return r; +} + +static int vcn_v2_5_stop(struct amdgpu_device *adev) +{ + uint32_t tmp; + int i, r; + + r = jpeg_v2_5_stop(adev); + if (r) + return r; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* wait for vcn idle */ + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r); + if (r) + return r; + + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r); + if (r) + return r; + + /* block LMI UMC channel */ + tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2); + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; + WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp); + + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK| + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r); + if (r) + return r; + + /* block VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_ARB_CTRL), + UVD_RB_ARB_CTRL__VCPU_DIS_MASK, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* reset VCPU */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + /* disable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + + /* clear status */ + WREG32_SOC15(VCN, i, mmUVD_STATUS, 0); + + vcn_v2_5_enable_clock_gating(adev); + + /* enable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + } + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, false); + + return 0; +} + +/** + * vcn_v2_5_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t vcn_v2_5_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR); +} + +/** + * vcn_v2_5_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t vcn_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR); +} + +/** + * vcn_v2_5_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_DEC, + .align_mask = 0xf, + .vmhub = AMDGPU_MMHUB_1, + .get_rptr = vcn_v2_5_dec_ring_get_rptr, + .get_wptr = vcn_v2_5_dec_ring_get_wptr, + .set_wptr = vcn_v2_5_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* vcn_v2_0_dec_ring_emit_vm_flush */ + 14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */ + 6, + .emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */ + .emit_ib = vcn_v2_0_dec_ring_emit_ib, + .emit_fence = vcn_v2_0_dec_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_dec_ring_test_ring, + .test_ib = amdgpu_vcn_dec_ring_test_ib, + .insert_nop = vcn_v2_0_dec_ring_insert_nop, + .insert_start = vcn_v2_0_dec_ring_insert_start, + .insert_end = vcn_v2_0_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_dec_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/** + * vcn_v2_5_enc_ring_get_rptr - get enc read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware enc read pointer + */ +static uint64_t vcn_v2_5_enc_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR); + else + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2); +} + +/** + * vcn_v2_5_enc_ring_get_wptr - get enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware enc write pointer + */ +static uint64_t vcn_v2_5_enc_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR); + } else { + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2); + } +} + +/** + * vcn_v2_5_enc_ring_set_wptr - set enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the enc write pointer to the hardware + */ +static void vcn_v2_5_enc_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + } + } else { + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + } + } +} + +static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_ENC, + .align_mask = 0x3f, + .nop = VCN_ENC_CMD_NO_OP, + .vmhub = AMDGPU_MMHUB_1, + .get_rptr = vcn_v2_5_enc_ring_get_rptr, + .get_wptr = vcn_v2_5_enc_ring_get_wptr, + .set_wptr = vcn_v2_5_enc_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + + 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ + 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ + 1, /* vcn_v2_0_enc_ring_insert_end */ + .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ + .emit_ib = vcn_v2_0_enc_ring_emit_ib, + .emit_fence = vcn_v2_0_enc_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_enc_ring_test_ring, + .test_ib = amdgpu_vcn_enc_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_end = vcn_v2_0_enc_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/** + * vcn_v2_5_jpeg_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t vcn_v2_5_jpeg_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_RPTR); +} + +/** + * vcn_v2_5_jpeg_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t vcn_v2_5_jpeg_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_WPTR); +} + +/** + * vcn_v2_5_jpeg_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void vcn_v2_5_jpeg_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +static const struct amdgpu_ring_funcs vcn_v2_5_jpeg_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .vmhub = AMDGPU_MMHUB_1, + .get_rptr = vcn_v2_5_jpeg_ring_get_rptr, + .get_wptr = vcn_v2_5_jpeg_ring_get_wptr, + .set_wptr = vcn_v2_5_jpeg_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* vcn_v2_0_jpeg_ring_emit_vm_flush */ + 18 + 18 + /* vcn_v2_0_jpeg_ring_emit_fence x2 vm fence */ + 8 + 16, + .emit_ib_size = 22, /* vcn_v2_0_jpeg_ring_emit_ib */ + .emit_ib = vcn_v2_0_jpeg_ring_emit_ib, + .emit_fence = vcn_v2_0_jpeg_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_jpeg_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_jpeg_ring_test_ring, + .test_ib = amdgpu_vcn_jpeg_ring_test_ib, + .insert_nop = vcn_v2_0_jpeg_ring_nop, + .insert_start = vcn_v2_0_jpeg_ring_insert_start, + .insert_end = vcn_v2_0_jpeg_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_jpeg_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_jpeg_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs; + adev->vcn.inst[i].ring_dec.me = i; + DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i); + } +} + +static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev) +{ + int i, j; + + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs; + adev->vcn.inst[j].ring_enc[i].me = j; + } + DRM_INFO("VCN(%d) encode is enabled in VM mode\n", j); + } +} + +static void vcn_v2_5_set_jpeg_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + adev->vcn.inst[i].ring_jpeg.funcs = &vcn_v2_5_jpeg_ring_vm_funcs; + adev->vcn.inst[i].ring_jpeg.me = i; + DRM_INFO("VCN(%d) jpeg decode is enabled in VM mode\n", i); + } +} + +static bool vcn_v2_5_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 1; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE); + } + + return ret; +} + +static int vcn_v2_5_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, + UVD_STATUS__IDLE, ret); + if (ret) + return ret; + } + + return ret; +} + +static int vcn_v2_5_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + + if (enable) { + if (vcn_v2_5_is_idle(handle)) + return -EBUSY; + vcn_v2_5_enable_clock_gating(adev); + } else { + vcn_v2_5_disable_clock_gating(adev); + } + + return 0; +} + +static int vcn_v2_5_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; + + if(state == adev->vcn.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = vcn_v2_5_stop(adev); + else + ret = vcn_v2_5_start(adev); + + if(!ret) + adev->vcn.cur_state = state; + + return ret; +} + +static int vcn_v2_5_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t ip_instance; + + switch (entry->client_id) { + case SOC15_IH_CLIENTID_VCN: + ip_instance = 0; + break; + case SOC15_IH_CLIENTID_VCN1: + ip_instance = 1; + break; + default: + DRM_ERROR("Unhandled client id: %d\n", entry->client_id); + return 0; + } + + DRM_DEBUG("IH: VCN TRAP\n"); + + switch (entry->src_id) { + case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT: + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec); + break; + case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE: + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]); + break; + case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY: + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]); + break; + case VCN_2_0__SRCID__JPEG_DECODE: + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_jpeg); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amdgpu_irq_src_funcs vcn_v2_5_irq_funcs = { + .set = vcn_v2_5_set_interrupt_state, + .process = vcn_v2_5_process_interrupt, +}; + +static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 2; + adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs; + } +} + +static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { + .name = "vcn_v2_5", + .early_init = vcn_v2_5_early_init, + .late_init = NULL, + .sw_init = vcn_v2_5_sw_init, + .sw_fini = vcn_v2_5_sw_fini, + .hw_init = vcn_v2_5_hw_init, + .hw_fini = vcn_v2_5_hw_fini, + .suspend = vcn_v2_5_suspend, + .resume = vcn_v2_5_resume, + .is_idle = vcn_v2_5_is_idle, + .wait_for_idle = vcn_v2_5_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = vcn_v2_5_set_clockgating_state, + .set_powergating_state = vcn_v2_5_set_powergating_state, +}; + +const struct amdgpu_ip_block_version vcn_v2_5_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_VCN, + .major = 2, + .minor = 5, + .rev = 0, + .funcs = &vcn_v2_5_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h new file mode 100644 index 000000000000..8d9c0800b8e0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h @@ -0,0 +1,29 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VCN_V2_5_H__ +#define __VCN_V2_5_H__ + +extern const struct amdgpu_ip_block_version vcn_v2_5_ip_block; + +#endif /* __VCN_V2_5_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 6d1f804277f8..5cb7e231de5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -20,7 +20,9 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include <drm/drmP.h> + +#include <linux/pci.h> + #include "amdgpu.h" #include "amdgpu_ih.h" #include "soc15.h" @@ -31,7 +33,7 @@ #include "soc15_common.h" #include "vega10_ih.h" - +#define MAX_REARM_RETRY 10 static void vega10_ih_set_interrupt_funcs(struct amdgpu_device *adev); @@ -48,14 +50,29 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + if (amdgpu_sriov_vf(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { + DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); + return; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + } adev->irq.ih.enabled = true; if (adev->irq.ih1.ring_size) { ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, RB_ENABLE, 1); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + if (amdgpu_sriov_vf(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, + ih_rb_cntl)) { + DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); + return; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + } adev->irq.ih1.enabled = true; } @@ -63,7 +80,15 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, RB_ENABLE, 1); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + if (amdgpu_sriov_vf(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, + ih_rb_cntl)) { + DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); + return; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + } adev->irq.ih2.enabled = true; } } @@ -81,7 +106,15 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + if (amdgpu_sriov_vf(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { + DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); + return; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + } + /* set rptr, wptr to 0 */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0); @@ -92,7 +125,15 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, RB_ENABLE, 0); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + if (amdgpu_sriov_vf(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, + ih_rb_cntl)) { + DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); + return; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + } /* set rptr, wptr to 0 */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0); WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0); @@ -104,7 +145,16 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, RB_ENABLE, 0); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + if (amdgpu_sriov_vf(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, + ih_rb_cntl)) { + DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); + return; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + } + /* set rptr, wptr to 0 */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0); WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0); @@ -136,6 +186,25 @@ static uint32_t vega10_ih_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl return ih_rb_cntl; } +static uint32_t vega10_ih_doorbell_rptr(struct amdgpu_ih_ring *ih) +{ + u32 ih_doorbell_rtpr = 0; + + if (ih->use_doorbell) { + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, OFFSET, + ih->doorbell_index); + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, + ENABLE, 1); + } else { + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, + ENABLE, 0); + } + return ih_doorbell_rtpr; +} + /** * vega10_ih_irq_init - init and enable the interrupt ring * @@ -150,14 +219,14 @@ static uint32_t vega10_ih_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl static int vega10_ih_irq_init(struct amdgpu_device *adev) { struct amdgpu_ih_ring *ih; + u32 ih_rb_cntl, ih_chicken; int ret = 0; - u32 ih_rb_cntl, ih_doorbell_rtpr; u32 tmp; /* disable irqs */ vega10_ih_disable_interrupts(adev); - adev->nbio_funcs->ih_control(adev); + adev->nbio.funcs->ih_control(adev); ih = &adev->irq.ih; /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ @@ -165,10 +234,29 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI, (ih->gpu_addr >> 40) & 0xff); ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL); + ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN); ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); + if (adev->irq.ih.use_bus_addr) { + ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1); + } else { + ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, MC_SPACE_FBPA_ENABLE, 1); + } ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + + if (amdgpu_sriov_vf(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { + DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + } + + if ((adev->asic_type == CHIP_ARCTURUS + && adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) + || adev->asic_type == CHIP_RENOIR) + WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken); /* set the writeback address whether it's enabled or not */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, @@ -177,23 +265,11 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) upper_32_bits(ih->wptr_addr) & 0xFFFF); /* set rptr, wptr to 0 */ - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); - ih_doorbell_rtpr = RREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR); - if (adev->irq.ih.use_doorbell) { - ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, - IH_DOORBELL_RPTR, OFFSET, - adev->irq.ih.doorbell_index); - ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, - IH_DOORBELL_RPTR, - ENABLE, 1); - } else { - ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, - IH_DOORBELL_RPTR, - ENABLE, 0); - } - WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR, ih_doorbell_rtpr); + WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR, + vega10_ih_doorbell_rptr(ih)); ih = &adev->irq.ih1; if (ih->ring_size) { @@ -203,11 +279,26 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_OVERFLOW_ENABLE, 0); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + RB_FULL_DRAIN_ENABLE, 1); + if (amdgpu_sriov_vf(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, + ih_rb_cntl)) { + DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + } /* set rptr, wptr to 0 */ - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0); WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0); + + WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR_RING1, + vega10_ih_doorbell_rptr(ih)); } ih = &adev->irq.ih2; @@ -216,13 +307,25 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI_RING2, (ih->gpu_addr >> 40) & 0xff); - ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); + ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + + if (amdgpu_sriov_vf(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, + ih_rb_cntl)) { + DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + } /* set rptr, wptr to 0 */ - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0); WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0); + + WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR_RING2, + vega10_ih_doorbell_rptr(ih)); } tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL); @@ -365,6 +468,38 @@ static void vega10_ih_decode_iv(struct amdgpu_device *adev, } /** + * vega10_ih_irq_rearm - rearm IRQ if lost + * + * @adev: amdgpu_device pointer + * + */ +static void vega10_ih_irq_rearm(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + uint32_t reg_rptr = 0; + uint32_t v = 0; + uint32_t i = 0; + + if (ih == &adev->irq.ih) + reg_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR); + else if (ih == &adev->irq.ih1) + reg_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING1); + else if (ih == &adev->irq.ih2) + reg_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING2); + else + return; + + /* Rearm IRQ / re-wwrite doorbell if doorbell write is lost */ + for (i = 0; i < MAX_REARM_RETRY; i++) { + v = RREG32_NO_KIQ(reg_rptr); + if ((v < ih->ring_size) && (v != ih->rptr)) + WDOORBELL32(ih->doorbell_index, ih->rptr); + else + break; + } +} + +/** * vega10_ih_set_rptr - set the IH ring buffer rptr * * @adev: amdgpu_device pointer @@ -378,6 +513,9 @@ static void vega10_ih_set_rptr(struct amdgpu_device *adev, /* XXX check if swapping is necessary on BE */ *ih->rptr_cpu = ih->rptr; WDOORBELL32(ih->doorbell_index, ih->rptr); + + if (amdgpu_sriov_vf(adev)) + vega10_ih_irq_rearm(adev, ih); } else if (ih == &adev->irq.ih) { WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, ih->rptr); } else if (ih == &adev->irq.ih1) { @@ -449,20 +587,23 @@ static int vega10_ih_sw_init(void *handle) if (r) return r; - if (adev->asic_type == CHIP_VEGA10) { - r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, PAGE_SIZE, true); - if (r) - return r; - - r = amdgpu_ih_ring_init(adev, &adev->irq.ih2, PAGE_SIZE, true); - if (r) - return r; - } - - /* TODO add doorbell for IH1 & IH2 as well */ adev->irq.ih.use_doorbell = true; adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1; + r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, PAGE_SIZE, true); + if (r) + return r; + + adev->irq.ih1.use_doorbell = true; + adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1; + + r = amdgpu_ih_ring_init(adev, &adev->irq.ih2, PAGE_SIZE, true); + if (r) + return r; + + adev->irq.ih2.use_doorbell = true; + adev->irq.ih2.doorbell_index = (adev->doorbell_index.ih + 2) << 1; + r = amdgpu_irq_init(adev); return r; @@ -534,10 +675,49 @@ static int vega10_ih_soft_reset(void *handle) return 0; } +static void vega10_ih_update_clockgating_state(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def, field_val; + + if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) { + def = data = RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL); + field_val = enable ? 0 : 1; + /** + * Vega10 does not have IH_RETRY_INT_CAM_MEM_CLK_SOFT_OVERRIDE + * and IH_BUFFER_MEM_CLK_SOFT_OVERRIDE field. + */ + if (adev->asic_type > CHIP_VEGA10) { + data = REG_SET_FIELD(data, IH_CLK_CTRL, + IH_RETRY_INT_CAM_MEM_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + IH_BUFFER_MEM_CLK_SOFT_OVERRIDE, field_val); + } + + data = REG_SET_FIELD(data, IH_CLK_CTRL, + DBUS_MUX_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + DYN_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + REG_CLK_SOFT_OVERRIDE, field_val); + if (def != data) + WREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL, data); + } +} + static int vega10_ih_set_clockgating_state(void *handle, enum amd_clockgating_state state) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + vega10_ih_update_clockgating_state(adev, + state == AMD_CG_STATE_GATE ? true : false); return 0; + } static int vega10_ih_set_powergating_state(void *handle, diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c index a8e92638a2e8..6b52a539d51b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c @@ -24,7 +24,6 @@ #include "soc15.h" #include "soc15_common.h" -#include "soc15_hw_ip.h" #include "vega10_ip_offset.h" int vega10_reg_base_init(struct amdgpu_device *adev) @@ -81,6 +80,10 @@ void vega10_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_DOORBELL64_VCE_RING2_3; adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_DOORBELL64_VCE_RING4_5; adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_DOORBELL64_VCE_RING6_7; + adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_DOORBELL64_VCN0_1; + adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_DOORBELL64_VCN2_3; + adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_DOORBELL64_VCN4_5; + adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_DOORBELL64_VCN6_7; adev->doorbell_index.first_non_cp = AMDGPU_DOORBELL64_FIRST_NON_CP; adev->doorbell_index.last_non_cp = AMDGPU_DOORBELL64_LAST_NON_CP; diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c index 0db84386252a..556f854e3551 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c @@ -24,7 +24,6 @@ #include "soc15.h" #include "soc15_common.h" -#include "soc15_hw_ip.h" #include "vega20_ip_offset.h" int vega20_reg_base_init(struct amdgpu_device *adev) @@ -50,6 +49,8 @@ int vega20_reg_base_init(struct amdgpu_device *adev) adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); + adev->reg_offset[UMC_HWIP][i] = (uint32_t *)(&(UMC_BASE.instance[i])); + adev->reg_offset[RSMU_HWIP][i] = (uint32_t *)(&(RSMU_BASE.instance[i])); } return 0; } @@ -85,6 +86,10 @@ void vega20_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3; adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5; adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7; + adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_VEGA20_DOORBELL64_VCN0_1; + adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCN2_3; + adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCN4_5; + adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCN6_7; adev->doorbell_index.first_non_cp = AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP; adev->doorbell_index.last_non_cp = AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 5e5b42a0744a..78e5cdc0c058 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -20,8 +20,10 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ + +#include <linux/pci.h> #include <linux/slab.h> -#include <drm/drmP.h> + #include "amdgpu.h" #include "amdgpu_atombios.h" #include "amdgpu_ih.h" @@ -57,7 +59,6 @@ #include "vid.h" #include "vi.h" -#include "vi_dpm.h" #include "gmc_v8_0.h" #include "gmc_v7_0.h" #include "gfx_v8_0.h" @@ -688,16 +689,50 @@ static int vi_gpu_pci_config_reset(struct amdgpu_device *adev) return -EINVAL; } +int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap) +{ + void *pp_handle = adev->powerplay.pp_handle; + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + + if (!pp_funcs || !pp_funcs->get_asic_baco_capability) { + *cap = false; + return -ENOENT; + } + + return pp_funcs->get_asic_baco_capability(pp_handle, cap); +} + +int smu7_asic_baco_reset(struct amdgpu_device *adev) +{ + void *pp_handle = adev->powerplay.pp_handle; + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + + if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) + return -ENOENT; + + /* enter BACO state */ + if (pp_funcs->set_asic_baco_state(pp_handle, 1)) + return -EIO; + + /* exit BACO state */ + if (pp_funcs->set_asic_baco_state(pp_handle, 0)) + return -EIO; + + dev_info(adev->dev, "GPU BACO reset\n"); + + return 0; +} + /** - * vi_asic_reset - soft reset GPU + * vi_asic_pci_config_reset - soft reset GPU * * @adev: amdgpu_device pointer * - * Look up which blocks are hung and attempt - * to reset them. + * Use PCI Config method to reset the GPU. + * * Returns 0 for success. */ -static int vi_asic_reset(struct amdgpu_device *adev) +static int vi_asic_pci_config_reset(struct amdgpu_device *adev) { int r; @@ -710,6 +745,52 @@ static int vi_asic_reset(struct amdgpu_device *adev) return r; } +static enum amd_reset_method +vi_asic_reset_method(struct amdgpu_device *adev) +{ + bool baco_reset; + + switch (adev->asic_type) { + case CHIP_FIJI: + case CHIP_TONGA: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + case CHIP_TOPAZ: + smu7_asic_get_baco_capability(adev, &baco_reset); + break; + default: + baco_reset = false; + break; + } + + if (baco_reset) + return AMD_RESET_METHOD_BACO; + else + return AMD_RESET_METHOD_LEGACY; +} + +/** + * vi_asic_reset - soft reset GPU + * + * @adev: amdgpu_device pointer + * + * Look up which blocks are hung and attempt + * to reset them. + * Returns 0 for success. + */ +static int vi_asic_reset(struct amdgpu_device *adev) +{ + int r; + + if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) + r = smu7_asic_baco_reset(adev); + else + r = vi_asic_pci_config_reset(adev); + + return r; +} + static u32 vi_get_config_memsize(struct amdgpu_device *adev) { return RREG32(mmCONFIG_MEMSIZE); @@ -987,6 +1068,18 @@ static void vi_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); } +static uint64_t vi_get_pcie_replay_count(struct amdgpu_device *adev) +{ + uint64_t nak_r, nak_g; + + /* Get the number of NAKs received and generated */ + nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK); + nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED); + + /* Add the total number of NAKs, i.e the number of replays */ + return (nak_r + nak_g); +} + static bool vi_need_reset_on_init(struct amdgpu_device *adev) { u32 clock_cntl, pc; @@ -1010,6 +1103,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = .read_bios_from_rom = &vi_read_bios_from_rom, .read_register = &vi_read_register, .reset = &vi_asic_reset, + .reset_method = &vi_asic_reset_method, .set_vga_state = &vi_vga_set_state, .get_xclk = &vi_get_xclk, .set_uvd_clocks = &vi_set_uvd_clocks, @@ -1021,6 +1115,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = .init_doorbell_index = &legacy_doorbell_index_init, .get_pcie_usage = &vi_get_pcie_usage, .need_reset_on_init = &vi_need_reset_on_init, + .get_pcie_replay_count = &vi_get_pcie_replay_count, }; #define CZ_REV_BRISTOL(rev) \ diff --git a/drivers/gpu/drm/amd/amdgpu/vi.h b/drivers/gpu/drm/amd/amdgpu/vi.h index 8de0772f986c..40d4174913a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.h +++ b/drivers/gpu/drm/amd/amdgpu/vi.h @@ -31,4 +31,7 @@ void vi_srbm_select(struct amdgpu_device *adev, int vi_set_ip_blocks(struct amdgpu_device *adev); void legacy_doorbell_index_init(struct amdgpu_device *adev); +int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap); +int smu7_asic_baco_reset(struct amdgpu_device *adev); + #endif |