From a454eb57f851769230604f012ea68aff54b1e1a4 Mon Sep 17 00:00:00 2001 From: Sangyoun Lee Date: Mon, 28 Oct 2024 15:54:41 +0900 Subject: [PATCH] Update rknpu 0.9.8 --- drivers/rknpu/include/rknpu_drv.h | 10 +- drivers/rknpu/include/rknpu_gem.h | 11 +- drivers/rknpu/include/rknpu_ioctl.h | 35 +-- drivers/rknpu/include/rknpu_iommu.h | 16 +- drivers/rknpu/rknpu_devfreq.c | 13 + drivers/rknpu/rknpu_drv.c | 157 +++++++++-- drivers/rknpu/rknpu_gem.c | 387 +++++++++++++++++++++----- drivers/rknpu/rknpu_iommu.c | 405 ++++++++++++++++++++++++++-- drivers/rknpu/rknpu_job.c | 19 +- drivers/rknpu/rknpu_reset.c | 3 + 10 files changed, 917 insertions(+), 139 deletions(-) diff --git a/drivers/rknpu/include/rknpu_drv.h b/drivers/rknpu/include/rknpu_drv.h index dbcb1b69a7446..90e66d68b727d 100644 --- a/drivers/rknpu/include/rknpu_drv.h +++ b/drivers/rknpu/include/rknpu_drv.h @@ -29,10 +29,10 @@ #define DRIVER_NAME "rknpu" #define DRIVER_DESC "RKNPU driver" -#define DRIVER_DATE "20240322" +#define DRIVER_DATE "20240828" #define DRIVER_MAJOR 0 #define DRIVER_MINOR 9 -#define DRIVER_PATCHLEVEL 6 +#define DRIVER_PATCHLEVEL 8 #define LOG_TAG "RKNPU" @@ -54,6 +54,7 @@ #define LOG_DEV_ERROR(dev, fmt, args...) dev_err(dev, LOG_TAG ": " fmt, ##args) #define RKNPU_MAX_IOMMU_DOMAIN_NUM 16 +#define RKNPU_CACHE_SG_TABLE_NUM 2 struct rknpu_irqs_data { const char *name; @@ -84,6 +85,8 @@ struct rknpu_config { __u32 core_mask; const struct rknpu_amount_data *amount_top; const struct rknpu_amount_data *amount_core; + void (*state_init)(struct rknpu_device *rknpu_dev); + int (*cache_sgt_init)(struct rknpu_device *rknpu_dev); }; struct rknpu_timer { @@ -170,6 +173,8 @@ struct rknpu_device { int iommu_domain_num; int iommu_domain_id; struct iommu_domain *iommu_domains[RKNPU_MAX_IOMMU_DOMAIN_NUM]; + struct sg_table *cache_sgt[RKNPU_CACHE_SG_TABLE_NUM]; + atomic_t iommu_domain_refcount; }; struct rknpu_session { @@ -179,5 +184,6 @@ struct rknpu_session { int rknpu_power_get(struct rknpu_device *rknpu_dev); int rknpu_power_put(struct rknpu_device *rknpu_dev); +int rknpu_power_put_delay(struct rknpu_device *rknpu_dev); #endif /* __LINUX_RKNPU_DRV_H_ */ diff --git a/drivers/rknpu/include/rknpu_gem.h b/drivers/rknpu/include/rknpu_gem.h index 903c0fa9be350..17f922105786f 100644 --- a/drivers/rknpu/include/rknpu_gem.h +++ b/drivers/rknpu/include/rknpu_gem.h @@ -61,6 +61,8 @@ struct rknpu_gem_object { struct sg_table *sgt; struct drm_mm_node mm_node; int iommu_domain_id; + unsigned int core_mask; + unsigned int cache_with_sgt; }; enum rknpu_cache_type { @@ -69,11 +71,10 @@ enum rknpu_cache_type { }; /* create a new buffer with gem object */ -struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *dev, - unsigned int flags, - unsigned long size, - unsigned long sram_size, - int iommu_domain_id); +struct rknpu_gem_object * +rknpu_gem_object_create(struct drm_device *dev, unsigned int flags, + unsigned long size, unsigned long sram_size, + int iommu_domain_id, unsigned int core_mask); /* destroy a buffer with gem object */ void rknpu_gem_object_destroy(struct rknpu_gem_object *rknpu_obj); diff --git a/drivers/rknpu/include/rknpu_ioctl.h b/drivers/rknpu/include/rknpu_ioctl.h index 76724f338f75e..98c544464d719 100644 --- a/drivers/rknpu/include/rknpu_ioctl.h +++ b/drivers/rknpu/include/rknpu_ioctl.h @@ -39,10 +39,10 @@ #define RKNPU_STR_HELPER(x) #x -#define RKNPU_GET_DRV_VERSION_STRING(MAJOR, MINOR, PATCHLEVEL) \ - RKNPU_STR_HELPER(MAJOR) \ +#define RKNPU_GET_DRV_VERSION_STRING(MAJOR, MINOR, PATCHLEVEL) \ + RKNPU_STR_HELPER(MAJOR) \ "." RKNPU_STR_HELPER(MINOR) "." RKNPU_STR_HELPER(PATCHLEVEL) -#define RKNPU_GET_DRV_VERSION_CODE(MAJOR, MINOR, PATCHLEVEL) \ +#define RKNPU_GET_DRV_VERSION_CODE(MAJOR, MINOR, PATCHLEVEL) \ (MAJOR * 10000 + MINOR * 100 + PATCHLEVEL) #define RKNPU_GET_DRV_VERSION_MAJOR(CODE) (CODE / 10000) #define RKNPU_GET_DRV_VERSION_MINOR(CODE) ((CODE % 10000) / 100) @@ -62,7 +62,7 @@ enum e_rknpu_mem_type { RKNPU_MEM_WRITE_COMBINE = 1 << 2, /* dma attr kernel mapping */ RKNPU_MEM_KERNEL_MAPPING = 1 << 3, - /* iommu mapping */ + /* IOMMU mapping */ RKNPU_MEM_IOMMU = 1 << 4, /* zero mapping */ RKNPU_MEM_ZEROING = 1 << 5, @@ -74,19 +74,22 @@ enum e_rknpu_mem_type { RKNPU_MEM_TRY_ALLOC_SRAM = 1 << 8, /* request NBUF */ RKNPU_MEM_TRY_ALLOC_NBUF = 1 << 9, + /* IOMMU limiting IOVA alignment */ + RKNPU_MEM_IOMMU_LIMIT_IOVA_ALIGNMENT = 1 << 10, RKNPU_MEM_MASK = RKNPU_MEM_NON_CONTIGUOUS | RKNPU_MEM_CACHEABLE | RKNPU_MEM_WRITE_COMBINE | RKNPU_MEM_KERNEL_MAPPING | RKNPU_MEM_IOMMU | RKNPU_MEM_ZEROING | RKNPU_MEM_SECURE | RKNPU_MEM_DMA32 | - RKNPU_MEM_TRY_ALLOC_SRAM | RKNPU_MEM_TRY_ALLOC_NBUF + RKNPU_MEM_TRY_ALLOC_SRAM | RKNPU_MEM_TRY_ALLOC_NBUF | + RKNPU_MEM_IOMMU_LIMIT_IOVA_ALIGNMENT }; /* sync mode definitions. */ enum e_rknpu_mem_sync_mode { RKNPU_MEM_SYNC_TO_DEVICE = 1 << 0, RKNPU_MEM_SYNC_FROM_DEVICE = 1 << 1, - RKNPU_MEM_SYNC_MASK = - RKNPU_MEM_SYNC_TO_DEVICE | RKNPU_MEM_SYNC_FROM_DEVICE + RKNPU_MEM_SYNC_MASK = RKNPU_MEM_SYNC_TO_DEVICE | + RKNPU_MEM_SYNC_FROM_DEVICE }; /* job mode definitions. */ @@ -155,7 +158,7 @@ struct rknpu_mem_create { __u64 dma_addr; __u64 sram_size; __s32 iommu_domain_id; - __u32 reserved; + __u32 core_mask; }; /** @@ -302,25 +305,25 @@ struct rknpu_action { #include -#define DRM_IOCTL_RKNPU_ACTION \ +#define DRM_IOCTL_RKNPU_ACTION \ DRM_IOWR(DRM_COMMAND_BASE + RKNPU_ACTION, struct rknpu_action) -#define DRM_IOCTL_RKNPU_SUBMIT \ +#define DRM_IOCTL_RKNPU_SUBMIT \ DRM_IOWR(DRM_COMMAND_BASE + RKNPU_SUBMIT, struct rknpu_submit) -#define DRM_IOCTL_RKNPU_MEM_CREATE \ +#define DRM_IOCTL_RKNPU_MEM_CREATE \ DRM_IOWR(DRM_COMMAND_BASE + RKNPU_MEM_CREATE, struct rknpu_mem_create) -#define DRM_IOCTL_RKNPU_MEM_MAP \ +#define DRM_IOCTL_RKNPU_MEM_MAP \ DRM_IOWR(DRM_COMMAND_BASE + RKNPU_MEM_MAP, struct rknpu_mem_map) -#define DRM_IOCTL_RKNPU_MEM_DESTROY \ +#define DRM_IOCTL_RKNPU_MEM_DESTROY \ DRM_IOWR(DRM_COMMAND_BASE + RKNPU_MEM_DESTROY, struct rknpu_mem_destroy) -#define DRM_IOCTL_RKNPU_MEM_SYNC \ +#define DRM_IOCTL_RKNPU_MEM_SYNC \ DRM_IOWR(DRM_COMMAND_BASE + RKNPU_MEM_SYNC, struct rknpu_mem_sync) #define IOCTL_RKNPU_ACTION RKNPU_IOWR(RKNPU_ACTION, struct rknpu_action) #define IOCTL_RKNPU_SUBMIT RKNPU_IOWR(RKNPU_SUBMIT, struct rknpu_submit) -#define IOCTL_RKNPU_MEM_CREATE \ +#define IOCTL_RKNPU_MEM_CREATE \ RKNPU_IOWR(RKNPU_MEM_CREATE, struct rknpu_mem_create) #define IOCTL_RKNPU_MEM_MAP RKNPU_IOWR(RKNPU_MEM_MAP, struct rknpu_mem_map) -#define IOCTL_RKNPU_MEM_DESTROY \ +#define IOCTL_RKNPU_MEM_DESTROY \ RKNPU_IOWR(RKNPU_MEM_DESTROY, struct rknpu_mem_destroy) #define IOCTL_RKNPU_MEM_SYNC RKNPU_IOWR(RKNPU_MEM_SYNC, struct rknpu_mem_sync) diff --git a/drivers/rknpu/include/rknpu_iommu.h b/drivers/rknpu/include/rknpu_iommu.h index 43d36db91e2db..40ea58e282d88 100644 --- a/drivers/rknpu/include/rknpu_iommu.h +++ b/drivers/rknpu/include/rknpu_iommu.h @@ -32,14 +32,26 @@ struct rknpu_iommu_dma_cookie { }; dma_addr_t rknpu_iommu_dma_alloc_iova(struct iommu_domain *domain, size_t size, - u64 dma_limit, struct device *dev); + u64 dma_limit, struct device *dev, + bool size_aligned); void rknpu_iommu_dma_free_iova(struct rknpu_iommu_dma_cookie *cookie, - dma_addr_t iova, size_t size); + dma_addr_t iova, size_t size, bool size_aligned); + +int rknpu_iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + bool iova_aligned); + +void rknpu_iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + bool iova_aligned); int rknpu_iommu_init_domain(struct rknpu_device *rknpu_dev); int rknpu_iommu_switch_domain(struct rknpu_device *rknpu_dev, int domain_id); void rknpu_iommu_free_domains(struct rknpu_device *rknpu_dev); +int rknpu_iommu_domain_get_and_switch(struct rknpu_device *rknpu_dev, + int domain_id); +int rknpu_iommu_domain_put(struct rknpu_device *rknpu_dev); #if KERNEL_VERSION(5, 10, 0) < LINUX_VERSION_CODE int iommu_get_dma_cookie(struct iommu_domain *domain); diff --git a/drivers/rknpu/rknpu_devfreq.c b/drivers/rknpu/rknpu_devfreq.c index 1fe98d567eb54..45efcca6490fc 100644 --- a/drivers/rknpu/rknpu_devfreq.c +++ b/drivers/rknpu/rknpu_devfreq.c @@ -234,6 +234,7 @@ static int npu_opp_config_clks(struct device *dev, struct opp_table *opp_table, static const struct rockchip_opp_data rk3576_npu_opp_data = { .set_read_margin = rk3576_npu_set_read_margin, + /* .set_soc_info = rockchip_opp_set_low_length, */ #if KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE .config_regulators = npu_opp_config_regulators, .config_clks = npu_opp_config_clks, @@ -395,7 +396,11 @@ int rknpu_devfreq_init(struct rknpu_device *rknpu_dev) err_remove_governor: devfreq_remove_governor(&devfreq_rknpu_ondemand); err_uinit_table: +#if KERNEL_VERSION(5, 10, 198) <= LINUX_VERSION_CODE rockchip_uninit_opp_table(dev, info); +#else + dev_pm_opp_of_remove_table(dev); +#endif return ret; } @@ -729,7 +734,11 @@ int rknpu_devfreq_init(struct rknpu_device *rknpu_dev) err_remove_governor: devfreq_remove_governor(&devfreq_rknpu_ondemand); err_remove_table: +#if KERNEL_VERSION(5, 10, 198) <= LINUX_VERSION_CODE rockchip_uninit_opp_table(dev, &rknpu_dev->opp_info); +#else + dev_pm_opp_of_remove_table(dev); +#endif rknpu_dev->devfreq = NULL; @@ -790,6 +799,10 @@ void rknpu_devfreq_remove(struct rknpu_device *rknpu_dev) } if (rknpu_dev->devfreq) devfreq_remove_governor(&devfreq_rknpu_ondemand); +#if KERNEL_VERSION(5, 10, 198) <= LINUX_VERSION_CODE rockchip_uninit_opp_table(rknpu_dev->dev, &rknpu_dev->opp_info); +#else + dev_pm_opp_of_remove_table(rknpu_dev->dev); +#endif } EXPORT_SYMBOL(rknpu_devfreq_remove); diff --git a/drivers/rknpu/rknpu_drv.c b/drivers/rknpu/rknpu_drv.c index f4f483845b7d8..e8afa9778c14c 100644 --- a/drivers/rknpu/rknpu_drv.c +++ b/drivers/rknpu/rknpu_drv.c @@ -108,6 +108,57 @@ static const struct rknpu_amount_data rknpu_core_amount = { .offset_wt_rd = 0x243c, }; +static void rk3576_state_init(struct rknpu_device *rknpu_dev) +{ + void __iomem *rknpu_core_base = rknpu_dev->base[0]; + + writel(0x1, rknpu_core_base + 0x10); + writel(0, rknpu_core_base + 0x1004); + writel(0x80000000, rknpu_core_base + 0x1024); + writel(1, rknpu_core_base + 0x1004); + writel(0x80000000, rknpu_core_base + 0x1024); + writel(0x1e, rknpu_core_base + 0x1004); +} + +static int rk3576_cache_sgt_init(struct rknpu_device *rknpu_dev) +{ + struct sg_table *sgt = NULL; + struct scatterlist *sgl = NULL; + uint64_t block_size_kb[4] = { 448, 64, 448, 64 }; + uint64_t block_offset_kb[4] = { 0, 896, 448, 960 }; + int core_num = rknpu_dev->config->num_irqs; + int ret = 0, i = 0, j = 0; + + for (i = 0; i < core_num; i++) { + sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL); + if (!sgt) + goto out_free_table; + ret = sg_alloc_table(sgt, core_num, GFP_KERNEL); + if (ret) { + kfree(sgt); + goto out_free_table; + } + rknpu_dev->cache_sgt[i] = sgt; + for_each_sgtable_sg(sgt, sgl, j) { + sg_set_page(sgl, NULL, + block_size_kb[i * core_num + j] * 1024, + block_offset_kb[i * core_num + j] * 1024); + } + } + return 0; + +out_free_table: + for (i = 0; i < core_num; i++) { + if (rknpu_dev->cache_sgt[i]) { + sg_free_table(rknpu_dev->cache_sgt[i]); + kfree(rknpu_dev->cache_sgt[i]); + rknpu_dev->cache_sgt[i] = NULL; + } + } + + return ret; +} + static const struct rknpu_config rk356x_rknpu_config = { .bw_priority_addr = 0xfe180008, .bw_priority_length = 0x10, @@ -125,6 +176,8 @@ static const struct rknpu_config rk356x_rknpu_config = { .core_mask = 0x1, .amount_top = &rknpu_old_top_amount, .amount_core = NULL, + .state_init = NULL, + .cache_sgt_init = NULL, }; static const struct rknpu_config rk3588_rknpu_config = { @@ -144,6 +197,8 @@ static const struct rknpu_config rk3588_rknpu_config = { .core_mask = 0x7, .amount_top = NULL, .amount_core = NULL, + .state_init = NULL, + .cache_sgt_init = NULL, }; static const struct rknpu_config rk3583_rknpu_config = { @@ -163,6 +218,8 @@ static const struct rknpu_config rk3583_rknpu_config = { .core_mask = 0x3, .amount_top = NULL, .amount_core = NULL, + .state_init = NULL, + .cache_sgt_init = NULL, }; static const struct rknpu_config rv1106_rknpu_config = { @@ -182,6 +239,8 @@ static const struct rknpu_config rv1106_rknpu_config = { .core_mask = 0x1, .amount_top = &rknpu_old_top_amount, .amount_core = NULL, + .state_init = NULL, + .cache_sgt_init = NULL, }; static const struct rknpu_config rk3562_rknpu_config = { @@ -201,6 +260,8 @@ static const struct rknpu_config rk3562_rknpu_config = { .core_mask = 0x1, .amount_top = &rknpu_old_top_amount, .amount_core = NULL, + .state_init = NULL, + .cache_sgt_init = NULL, }; static const struct rknpu_config rk3576_rknpu_config = { @@ -220,6 +281,8 @@ static const struct rknpu_config rk3576_rknpu_config = { .core_mask = 0x3, .amount_top = &rknpu_top_amount, .amount_core = &rknpu_core_amount, + .state_init = rk3576_state_init, + .cache_sgt_init = rk3576_cache_sgt_init, }; /* driver probe and init */ @@ -263,13 +326,20 @@ static int rknpu_power_off(struct rknpu_device *rknpu_dev); static void rknpu_power_off_delay_work(struct work_struct *power_off_work) { + int ret = 0; struct rknpu_device *rknpu_dev = container_of(to_delayed_work(power_off_work), struct rknpu_device, power_off_work); mutex_lock(&rknpu_dev->power_lock); - if (atomic_dec_if_positive(&rknpu_dev->power_refcount) == 0) - rknpu_power_off(rknpu_dev); + if (atomic_dec_if_positive(&rknpu_dev->power_refcount) == 0) { + ret = rknpu_power_off(rknpu_dev); + if (ret) + atomic_inc(&rknpu_dev->power_refcount); + } mutex_unlock(&rknpu_dev->power_lock); + + if (ret) + rknpu_power_put_delay(rknpu_dev); } int rknpu_power_get(struct rknpu_device *rknpu_dev) @@ -289,14 +359,20 @@ int rknpu_power_put(struct rknpu_device *rknpu_dev) int ret = 0; mutex_lock(&rknpu_dev->power_lock); - if (atomic_dec_if_positive(&rknpu_dev->power_refcount) == 0) + if (atomic_dec_if_positive(&rknpu_dev->power_refcount) == 0) { ret = rknpu_power_off(rknpu_dev); + if (ret) + atomic_inc(&rknpu_dev->power_refcount); + } mutex_unlock(&rknpu_dev->power_lock); + if (ret) + rknpu_power_put_delay(rknpu_dev); + return ret; } -static int rknpu_power_put_delay(struct rknpu_device *rknpu_dev) +int rknpu_power_put_delay(struct rknpu_device *rknpu_dev) { if (rknpu_dev->power_put_delay == 0) return rknpu_power_put(rknpu_dev); @@ -409,8 +485,11 @@ static int rknpu_action(struct rknpu_device *rknpu_dev, ret = 0; break; case RKNPU_SET_IOMMU_DOMAIN_ID: { - ret = rknpu_iommu_switch_domain(rknpu_dev, - *(int32_t *)&args->value); + ret = rknpu_iommu_domain_get_and_switch( + rknpu_dev, *(int32_t *)&args->value); + if (ret) + break; + rknpu_iommu_domain_put(rknpu_dev); break; } default: @@ -568,16 +647,16 @@ static int rknpu_action_ioctl(struct drm_device *dev, void *data, return rknpu_action(rknpu_dev, (struct rknpu_action *)data); } -#define RKNPU_IOCTL(func) \ - static int __##func(struct drm_device *dev, void *data, \ - struct drm_file *file_priv) \ - { \ - struct rknpu_device *rknpu_dev = dev_get_drvdata(dev->dev); \ - int ret = -EINVAL; \ - rknpu_power_get(rknpu_dev); \ - ret = func(dev, data, file_priv); \ - rknpu_power_put_delay(rknpu_dev); \ - return ret; \ +#define RKNPU_IOCTL(func) \ + static int __##func(struct drm_device *dev, void *data, \ + struct drm_file *file_priv) \ + { \ + struct rknpu_device *rknpu_dev = dev_get_drvdata(dev->dev); \ + int ret = -EINVAL; \ + rknpu_power_get(rknpu_dev); \ + ret = func(dev, data, file_priv); \ + rknpu_power_put_delay(rknpu_dev); \ + return ret; \ } RKNPU_IOCTL(rknpu_action_ioctl); @@ -916,6 +995,9 @@ static int rknpu_power_on(struct rknpu_device *rknpu_dev) ret); } + if (rknpu_dev->config->state_init != NULL) + rknpu_dev->config->state_init(rknpu_dev); + out: #ifndef FPGA_PLATFORM rknpu_devfreq_unlock(rknpu_dev); @@ -1207,6 +1289,7 @@ static int rknpu_probe(struct platform_device *pdev) rknpu_dev->config = config; rknpu_dev->dev = dev; + dev_set_drvdata(dev, rknpu_dev); rknpu_dev->iommu_en = rknpu_is_iommu_enable(dev); if (rknpu_dev->iommu_en) { @@ -1405,8 +1488,11 @@ static int rknpu_probe(struct platform_device *pdev) } if (IS_ENABLED(CONFIG_NO_GKI) && rknpu_dev->iommu_en && - rknpu_dev->config->nbuf_size > 0) + rknpu_dev->config->nbuf_size > 0) { rknpu_find_nbuf_resource(rknpu_dev); + if (rknpu_dev->config->cache_sgt_init != NULL) + rknpu_dev->config->cache_sgt_init(rknpu_dev); + } if (rknpu_dev->iommu_en) rknpu_iommu_init_domain(rknpu_dev); @@ -1414,6 +1500,7 @@ static int rknpu_probe(struct platform_device *pdev) rknpu_power_off(rknpu_dev); atomic_set(&rknpu_dev->power_refcount, 0); atomic_set(&rknpu_dev->cmdline_power_refcount, 0); + atomic_set(&rknpu_dev->iommu_domain_refcount, 0); rknpu_debugger_init(rknpu_dev); rknpu_init_timer(rknpu_dev); @@ -1450,6 +1537,16 @@ static int rknpu_remove(struct platform_device *pdev) rknpu_debugger_remove(rknpu_dev); rknpu_cancel_timer(rknpu_dev); + if (rknpu_dev->config->cache_sgt_init != NULL) { + for (i = 0; i < RKNPU_CACHE_SG_TABLE_NUM; i++) { + if (rknpu_dev->cache_sgt[i]) { + sg_free_table(rknpu_dev->cache_sgt[i]); + kfree(rknpu_dev->cache_sgt[i]); + rknpu_dev->cache_sgt[i] = NULL; + } + } + } + for (i = 0; i < rknpu_dev->config->num_irqs; i++) { WARN_ON(rknpu_dev->subcore_datas[i].job); WARN_ON(!list_empty(&rknpu_dev->subcore_datas[i].todo_list)); @@ -1494,6 +1591,26 @@ static int rknpu_remove(struct platform_device *pdev) } #ifndef FPGA_PLATFORM +#ifdef CONFIG_PM_SLEEP +static int rknpu_suspend(struct device *dev) +{ + struct rknpu_device *rknpu_dev = dev_get_drvdata(dev); + + rknpu_power_get(rknpu_dev); + + return pm_runtime_force_suspend(dev); +} + +static int rknpu_resume(struct device *dev) +{ + struct rknpu_device *rknpu_dev = dev_get_drvdata(dev); + + rknpu_power_put_delay(rknpu_dev); + + return pm_runtime_force_resume(dev); +} +#endif + static int rknpu_runtime_suspend(struct device *dev) { return rknpu_devfreq_runtime_suspend(dev); @@ -1505,10 +1622,8 @@ static int rknpu_runtime_resume(struct device *dev) } static const struct dev_pm_ops rknpu_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, - pm_runtime_force_resume) - SET_RUNTIME_PM_OPS(rknpu_runtime_suspend, rknpu_runtime_resume, - NULL) + SET_SYSTEM_SLEEP_PM_OPS(rknpu_suspend, rknpu_resume) SET_RUNTIME_PM_OPS( + rknpu_runtime_suspend, rknpu_runtime_resume, NULL) }; #endif diff --git a/drivers/rknpu/rknpu_gem.c b/drivers/rknpu/rknpu_gem.c index 4cabe13a54c0a..a5c5354ed85ce 100644 --- a/drivers/rknpu/rknpu_gem.c +++ b/drivers/rknpu/rknpu_gem.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -37,6 +38,8 @@ static int rknpu_gem_get_pages(struct rknpu_gem_object *rknpu_obj) dma_addr_t dma_addr = 0; dma_addr_t phys = 0; int ret = -EINVAL, i = 0; + bool iova_aligned = + !(rknpu_obj->flags & RKNPU_MEM_IOMMU_LIMIT_IOVA_ALIGNMENT); rknpu_obj->pages = drm_gem_get_pages(&rknpu_obj->base); if (IS_ERR(rknpu_obj->pages)) { @@ -60,8 +63,9 @@ static int rknpu_gem_get_pages(struct rknpu_gem_object *rknpu_obj) goto put_pages; } - ret = dma_map_sg(drm->dev, rknpu_obj->sgt->sgl, rknpu_obj->sgt->nents, - DMA_BIDIRECTIONAL); + ret = rknpu_iommu_dma_map_sg(drm->dev, rknpu_obj->sgt->sgl, + rknpu_obj->sgt->nents, DMA_BIDIRECTIONAL, + iova_aligned); if (ret == 0) { ret = -EFAULT; LOG_DEV_ERROR(drm->dev, "%s: dma map %zu fail\n", __func__, @@ -95,8 +99,9 @@ static int rknpu_gem_get_pages(struct rknpu_gem_object *rknpu_obj) return 0; unmap_sg: - dma_unmap_sg(drm->dev, rknpu_obj->sgt->sgl, rknpu_obj->sgt->nents, - DMA_BIDIRECTIONAL); + rknpu_iommu_dma_unmap_sg(drm->dev, rknpu_obj->sgt->sgl, + rknpu_obj->sgt->nents, DMA_BIDIRECTIONAL, + iova_aligned); free_sgt: sg_free_table(rknpu_obj->sgt); @@ -111,6 +116,8 @@ static int rknpu_gem_get_pages(struct rknpu_gem_object *rknpu_obj) static void rknpu_gem_put_pages(struct rknpu_gem_object *rknpu_obj) { struct drm_device *drm = rknpu_obj->base.dev; + bool iova_aligned = + !(rknpu_obj->flags & RKNPU_MEM_IOMMU_LIMIT_IOVA_ALIGNMENT); if (rknpu_obj->flags & RKNPU_MEM_KERNEL_MAPPING) { vunmap(rknpu_obj->kv_addr); @@ -118,8 +125,9 @@ static void rknpu_gem_put_pages(struct rknpu_gem_object *rknpu_obj) } if (rknpu_obj->sgt != NULL) { - dma_unmap_sg(drm->dev, rknpu_obj->sgt->sgl, - rknpu_obj->sgt->nents, DMA_BIDIRECTIONAL); + rknpu_iommu_dma_unmap_sg(drm->dev, rknpu_obj->sgt->sgl, + rknpu_obj->sgt->nents, + DMA_BIDIRECTIONAL, iova_aligned); sg_free_table(rknpu_obj->sgt); kfree(rknpu_obj->sgt); } @@ -198,9 +206,9 @@ static int rknpu_gem_alloc_buf(struct rknpu_gem_object *rknpu_obj) return -ENOMEM; } - rknpu_obj->cookie = - dma_alloc_attrs(drm->dev, rknpu_obj->size, &rknpu_obj->dma_addr, - gfp_mask, rknpu_obj->dma_attrs); + rknpu_obj->cookie = dma_alloc_attrs(drm->dev, rknpu_obj->size, + &rknpu_obj->dma_addr, gfp_mask, + rknpu_obj->dma_attrs); if (!rknpu_obj->cookie) { /* * when RKNPU_MEM_CONTIGUOUS and IOMMU is available @@ -214,10 +222,9 @@ static int rknpu_gem_alloc_buf(struct rknpu_gem_object *rknpu_obj) rknpu_obj->size); rknpu_obj->dma_attrs &= ~DMA_ATTR_FORCE_CONTIGUOUS; rknpu_obj->flags |= RKNPU_MEM_NON_CONTIGUOUS; - rknpu_obj->cookie = - dma_alloc_attrs(drm->dev, rknpu_obj->size, - &rknpu_obj->dma_addr, gfp_mask, - rknpu_obj->dma_attrs); + rknpu_obj->cookie = dma_alloc_attrs( + drm->dev, rknpu_obj->size, &rknpu_obj->dma_addr, + gfp_mask, rknpu_obj->dma_attrs); if (!rknpu_obj->cookie) { LOG_DEV_ERROR( drm->dev, @@ -412,6 +419,50 @@ static void rknpu_gem_release(struct rknpu_gem_object *rknpu_obj) kfree(rknpu_obj); } +static int rknpu_iommu_map_with_cache_sgt(struct iommu_domain *domain, + struct rknpu_device *rknpu_dev, + struct rknpu_gem_object *rknpu_obj, + unsigned long cache_size) +{ + phys_addr_t cache_start = 0; + unsigned long iova_start = rknpu_obj->iova_start; + struct scatterlist *s = NULL; + unsigned long length = cache_size; + unsigned long size = 0; + int i = 0; + int ret = 0; + int index = 0; + + switch (rknpu_obj->core_mask) { + case RKNPU_CORE0_MASK: + index = 0; + break; + case RKNPU_CORE1_MASK: + index = 1; + break; + default: + break; + } + + for_each_sgtable_sg(rknpu_dev->cache_sgt[index], s, i) { + cache_start = rknpu_dev->nbuf_start + s->offset; + size = length < s->length ? length : s->length; + ret = iommu_map(domain, iova_start, cache_start, size, + IOMMU_READ | IOMMU_WRITE); + if (ret) { + LOG_ERROR("cache iommu_map error: %d\n", ret); + return ret; + } + length -= size; + iova_start += size; + + if (length == 0) + break; + } + + return ret; +} + static int rknpu_gem_alloc_buf_with_cache(struct rknpu_gem_object *rknpu_obj, enum rknpu_cache_type cache_type) { @@ -429,6 +480,8 @@ static int rknpu_gem_alloc_buf_with_cache(struct rknpu_gem_object *rknpu_obj, phys_addr_t cache_start = 0; unsigned long cache_offset = 0; unsigned long cache_size = 0; + bool iova_aligned = + !(rknpu_obj->flags & RKNPU_MEM_IOMMU_LIMIT_IOVA_ALIGNMENT); switch (cache_type) { case RKNPU_CACHE_SRAM: @@ -458,7 +511,8 @@ static int rknpu_gem_alloc_buf_with_cache(struct rknpu_gem_object *rknpu_obj, iovad = &cookie->iovad; rknpu_obj->iova_size = iova_align(iovad, cache_size + rknpu_obj->size); rknpu_obj->iova_start = rknpu_iommu_dma_alloc_iova( - domain, rknpu_obj->iova_size, dma_get_mask(drm->dev), drm->dev); + domain, rknpu_obj->iova_size, dma_get_mask(drm->dev), drm->dev, + iova_aligned); if (!rknpu_obj->iova_start) { LOG_ERROR("iommu_dma_alloc_iova failed\n"); return -ENOMEM; @@ -491,9 +545,14 @@ static int rknpu_gem_alloc_buf_with_cache(struct rknpu_gem_object *rknpu_obj, * |<- - - - - - - iova_size - - - - - - ->| * */ - ret = iommu_map(domain, rknpu_obj->iova_start, - cache_start + cache_offset, cache_size, - IOMMU_READ | IOMMU_WRITE); + if (!rknpu_obj->cache_with_sgt) + ret = iommu_map(domain, rknpu_obj->iova_start, + cache_start + cache_offset, cache_size, + IOMMU_READ | IOMMU_WRITE); + else + ret = rknpu_iommu_map_with_cache_sgt(domain, rknpu_dev, + rknpu_obj, cache_size); + if (ret) { LOG_ERROR("cache iommu_map error: %d\n", ret); goto free_iova; @@ -567,7 +626,8 @@ static int rknpu_gem_alloc_buf_with_cache(struct rknpu_gem_object *rknpu_obj, free_iova: rknpu_iommu_dma_free_iova((void *)domain->iova_cookie, - rknpu_obj->iova_start, rknpu_obj->iova_size); + rknpu_obj->iova_start, rknpu_obj->iova_size, + iova_aligned); return ret; } @@ -579,6 +639,8 @@ static void rknpu_gem_free_buf_with_cache(struct rknpu_gem_object *rknpu_obj, struct rknpu_device *rknpu_dev = drm->dev_private; struct iommu_domain *domain = NULL; unsigned long cache_size = 0; + bool iova_aligned = + !(rknpu_obj->flags & RKNPU_MEM_IOMMU_LIMIT_IOVA_ALIGNMENT); switch (cache_type) { case RKNPU_CACHE_SRAM: @@ -600,7 +662,7 @@ static void rknpu_gem_free_buf_with_cache(struct rknpu_gem_object *rknpu_obj, rknpu_obj->size); rknpu_iommu_dma_free_iova((void *)domain->iova_cookie, rknpu_obj->iova_start, - rknpu_obj->iova_size); + rknpu_obj->iova_size, iova_aligned); } if (rknpu_obj->pages) @@ -613,11 +675,10 @@ static void rknpu_gem_free_buf_with_cache(struct rknpu_gem_object *rknpu_obj, } } -struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *drm, - unsigned int flags, - unsigned long size, - unsigned long sram_size, - int iommu_domain_id) +struct rknpu_gem_object * +rknpu_gem_object_create(struct drm_device *drm, unsigned int flags, + unsigned long size, unsigned long sram_size, + int iommu_domain_id, unsigned int core_mask) { struct rknpu_device *rknpu_dev = drm->dev_private; struct rknpu_gem_object *rknpu_obj = NULL; @@ -635,8 +696,13 @@ struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *drm, if (IS_ERR(rknpu_obj)) return rknpu_obj; - if (!rknpu_iommu_switch_domain(rknpu_dev, iommu_domain_id)) - rknpu_obj->iommu_domain_id = iommu_domain_id; + if (rknpu_iommu_domain_get_and_switch(rknpu_dev, iommu_domain_id)) { + LOG_DEV_ERROR(rknpu_dev->dev, "%s error\n", __func__); + rknpu_gem_release(rknpu_obj); + return ERR_PTR(-EINVAL); + } + + rknpu_obj->iommu_domain_id = iommu_domain_id; if (!rknpu_dev->iommu_en && (flags & RKNPU_MEM_NON_CONTIGUOUS)) { /* @@ -648,6 +714,9 @@ struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *drm, "non-contiguous allocation is not supported without IOMMU, falling back to contiguous buffer\n"); } + /* set memory type and cache attribute from user side. */ + rknpu_obj->flags = flags; + if (IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && (flags & RKNPU_MEM_TRY_ALLOC_SRAM) && rknpu_dev->sram_size > 0) { size_t sram_free_size = 0; @@ -656,8 +725,7 @@ struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *drm, if (sram_size != 0) sram_size = round_up(sram_size, PAGE_SIZE); - /* set memory type and cache attribute from user side. */ - rknpu_obj->flags = flags; + rknpu_obj->cache_with_sgt = 0; sram_free_size = rknpu_dev->sram_mm->free_chunks * rknpu_dev->sram_mm->chunk_size; @@ -692,12 +760,22 @@ struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *drm, } else if (IS_ENABLED(CONFIG_NO_GKI) && (flags & RKNPU_MEM_TRY_ALLOC_NBUF) && rknpu_dev->nbuf_size > 0) { - size_t nbuf_size = remain_ddr_size <= rknpu_dev->nbuf_size ? - remain_ddr_size : - rknpu_dev->nbuf_size; + size_t nbuf_size = rknpu_dev->nbuf_size; + + rknpu_obj->cache_with_sgt = 0; + + if (core_mask == RKNPU_CORE_AUTO_MASK || + core_mask == RKNPU_CORE0_MASK || + core_mask == RKNPU_CORE1_MASK) { + if (rknpu_dev->cache_sgt[0]) + rknpu_obj->cache_with_sgt = 1; + nbuf_size = rknpu_dev->nbuf_size / + rknpu_dev->config->num_irqs; + } - /* set memory type and cache attribute from user side. */ - rknpu_obj->flags = flags; + rknpu_obj->core_mask = core_mask; + nbuf_size = remain_ddr_size <= nbuf_size ? remain_ddr_size : + nbuf_size; if (nbuf_size > 0) { rknpu_obj->nbuf_size = nbuf_size; @@ -711,14 +789,13 @@ struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *drm, } if (remain_ddr_size > 0) { - /* set memory type and cache attribute from user side. */ - rknpu_obj->flags = flags; - ret = rknpu_gem_alloc_buf(rknpu_obj); if (ret < 0) goto gem_release; } + rknpu_iommu_domain_put(rknpu_dev); + LOG_DEBUG( "created dma addr: %pad, cookie: %p, ddr size: %lu, sram size: %lu, nbuf size: %lu, attrs: %#lx, flags: %#x, iommu domain id: %d\n", &rknpu_obj->dma_addr, rknpu_obj->cookie, rknpu_obj->size, @@ -736,6 +813,8 @@ struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *drm, gem_release: rknpu_gem_release(rknpu_obj); + rknpu_iommu_domain_put(rknpu_dev); + return ERR_PTR(ret); } @@ -743,13 +822,26 @@ void rknpu_gem_object_destroy(struct rknpu_gem_object *rknpu_obj) { struct drm_gem_object *obj = &rknpu_obj->base; struct rknpu_device *rknpu_dev = obj->dev->dev_private; + int wait_count = 0; + int ret = -EINVAL; LOG_DEBUG( "destroy dma addr: %pad, cookie: %p, size: %lu, attrs: %#lx, flags: %#x, handle count: %d\n", &rknpu_obj->dma_addr, rknpu_obj->cookie, rknpu_obj->size, rknpu_obj->dma_attrs, rknpu_obj->flags, obj->handle_count); - rknpu_iommu_switch_domain(rknpu_dev, rknpu_obj->iommu_domain_id); + do { + ret = rknpu_iommu_domain_get_and_switch( + rknpu_dev, rknpu_obj->iommu_domain_id); + + if (ret && ++wait_count >= 3) { + LOG_DEV_ERROR( + rknpu_dev->dev, + "failed to destroy dma addr: %pad, size: %lu\n", + &rknpu_obj->dma_addr, rknpu_obj->size); + return; + } + } while (ret); /* * do not release memory region from exporter. @@ -778,6 +870,7 @@ void rknpu_gem_object_destroy(struct rknpu_gem_object *rknpu_obj) } rknpu_gem_release(rknpu_obj); + rknpu_iommu_domain_put(rknpu_dev); } int rknpu_gem_create_ioctl(struct drm_device *drm, void *data, @@ -791,7 +884,8 @@ int rknpu_gem_create_ioctl(struct drm_device *drm, void *data, if (!rknpu_obj) { rknpu_obj = rknpu_gem_object_create(drm, args->flags, args->size, args->sram_size, - args->iommu_domain_id); + args->iommu_domain_id, + args->core_mask); if (IS_ERR(rknpu_obj)) return PTR_ERR(rknpu_obj); @@ -833,16 +927,29 @@ int rknpu_gem_destroy_ioctl(struct drm_device *drm, void *data, struct rknpu_device *rknpu_dev = drm->dev_private; struct rknpu_gem_object *rknpu_obj = NULL; struct rknpu_mem_destroy *args = data; + int ret = 0; + int wait_count = 0; rknpu_obj = rknpu_gem_object_find(file_priv, args->handle); if (!rknpu_obj) return -EINVAL; - rknpu_iommu_switch_domain(rknpu_dev, rknpu_obj->iommu_domain_id); + do { + ret = rknpu_iommu_domain_get_and_switch( + rknpu_dev, rknpu_obj->iommu_domain_id); + + if (ret && ++wait_count >= 3) { + LOG_DEV_ERROR(rknpu_dev->dev, + "failed to destroy memory\n"); + return ret; + } + } while (ret); - // rknpu_gem_object_put(&rknpu_obj->base); + ret = rknpu_gem_handle_destroy(file_priv, args->handle); - return rknpu_gem_handle_destroy(file_priv, args->handle); + rknpu_iommu_domain_put(rknpu_dev); + + return ret; } #if RKNPU_GEM_ALLOC_FROM_PAGES @@ -900,6 +1007,53 @@ static int rknpu_gem_mmap_pages(struct rknpu_gem_object *rknpu_obj, } #endif +static int rknpu_remap_pfn_with_cache_sgt(struct rknpu_device *rknpu_dev, + struct rknpu_gem_object *rknpu_obj, + struct vm_area_struct *vma, + unsigned long cache_size) +{ + phys_addr_t cache_start = 0; + unsigned long vm_start = vma->vm_start; + struct scatterlist *s = NULL; + unsigned long length = cache_size; + unsigned long size = 0; + int i = 0; + int ret = 0; + int index = 0; + + switch (rknpu_obj->core_mask) { + case RKNPU_CORE0_MASK: + index = 0; + break; + case RKNPU_CORE1_MASK: + index = 1; + break; + default: + break; + } + + for_each_sgtable_sg(rknpu_dev->cache_sgt[index], s, i) { + cache_start = rknpu_dev->nbuf_start + s->offset; + size = length < s->length ? length : s->length; + + vma->vm_pgoff = __phys_to_pfn(cache_start); + ret = remap_pfn_range(vma, vm_start, vma->vm_pgoff, size, + vma->vm_page_prot); + + if (ret) { + LOG_ERROR("cache remap_pfn_range error: %d\n", ret); + return ret; + } + length -= size; + vm_start += size; + + if (length == 0) + break; + } + + return ret; +} + static int rknpu_gem_mmap_cache(struct rknpu_gem_object *rknpu_obj, struct vm_area_struct *vma, enum rknpu_cache_type cache_type) @@ -945,10 +1099,16 @@ static int rknpu_gem_mmap_cache(struct rknpu_gem_object *rknpu_obj, * NOTE: This conversion carries a risk because the resulting PFN is not a true * page frame number and may not be valid or usable in all contexts. */ - vma->vm_pgoff = __phys_to_pfn(cache_start + cache_offset); - ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, cache_size, - vma->vm_page_prot); + if (!rknpu_obj->cache_with_sgt) { + vma->vm_pgoff = __phys_to_pfn(cache_start + cache_offset); + + ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + cache_size, vma->vm_page_prot); + } else + ret = rknpu_remap_pfn_with_cache_sgt(rknpu_dev, rknpu_obj, vma, + cache_size); + if (ret) return -EAGAIN; @@ -1019,7 +1179,11 @@ static int rknpu_gem_mmap_buffer(struct rknpu_gem_object *rknpu_obj, void rknpu_gem_free_object(struct drm_gem_object *obj) { + struct rknpu_device *rknpu_dev = obj->dev->dev_private; + + rknpu_power_get(rknpu_dev); rknpu_gem_object_destroy(to_rknpu_obj(obj)); + rknpu_power_put_delay(rknpu_dev); } int rknpu_gem_dumb_create(struct drm_file *file_priv, struct drm_device *drm, @@ -1043,7 +1207,7 @@ int rknpu_gem_dumb_create(struct drm_file *file_priv, struct drm_device *drm, else flags = RKNPU_MEM_CONTIGUOUS | RKNPU_MEM_WRITE_COMBINE; - rknpu_obj = rknpu_gem_object_create(drm, flags, args->size, 0, 0); + rknpu_obj = rknpu_gem_object_create(drm, flags, args->size, 0, 0, 0); if (IS_ERR(rknpu_obj)) { LOG_DEV_ERROR(drm->dev, "gem object allocate failed.\n"); return PTR_ERR(rknpu_obj); @@ -1366,16 +1530,78 @@ int rknpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) return rknpu_gem_mmap_obj(obj, vma); } +static int rknpu_cache_sync_with_sg(struct rknpu_device *rknpu_dev, + struct rknpu_gem_object *rknpu_obj, + unsigned long *length, + unsigned long *offset, uint32_t dir) +{ + struct scatterlist *s = NULL; + int i = 0; + int index = 0; + void __iomem *cache_start = 0; + unsigned long cache_length = 0; + + switch (rknpu_obj->core_mask) { + case RKNPU_CORE0_MASK: + index = 0; + break; + case RKNPU_CORE1_MASK: + index = 1; + break; + default: + break; + } + + for_each_sgtable_sg(rknpu_dev->cache_sgt[index], s, i) { + cache_start = rknpu_dev->nbuf_base_io + s->offset; + cache_length = (*offset + *length) <= s->length ? + *length : + s->length - *offset; + if (dir & RKNPU_MEM_SYNC_TO_DEVICE) { +#if KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE + __dma_map_area(cache_start, cache_length, + DMA_TO_DEVICE); +#else + dcache_clean_poc((unsigned long)cache_start, + (unsigned long)cache_start + + cache_length); +#endif + } + + if (dir & RKNPU_MEM_SYNC_FROM_DEVICE) { +#if KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE + __dma_unmap_area(cache_start, cache_length, + DMA_FROM_DEVICE); +#else + dcache_inval_poc((unsigned long)cache_start, + (unsigned long)cache_start + + cache_length); +#endif + } + + *length = (*offset + *length) <= s->length ? + 0 : + *length - cache_length; + *offset = 0; + + if (*length == 0) + break; + } + + return 0; +} + static int rknpu_cache_sync(struct rknpu_gem_object *rknpu_obj, unsigned long *length, unsigned long *offset, - enum rknpu_cache_type cache_type) + enum rknpu_cache_type cache_type, uint32_t dir) { -#if KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE struct drm_gem_object *obj = &rknpu_obj->base; struct rknpu_device *rknpu_dev = obj->dev->dev_private; void __iomem *cache_base_io = NULL; unsigned long cache_offset = 0; unsigned long cache_size = 0; + void __iomem *cache_start = 0; + unsigned long cache_length = 0; switch (cache_type) { case RKNPU_CACHE_SRAM: @@ -1394,26 +1620,46 @@ static int rknpu_cache_sync(struct rknpu_gem_object *rknpu_obj, return -EINVAL; } - if ((*offset + *length) <= cache_size) { - __dma_map_area(cache_base_io + *offset + cache_offset, *length, - DMA_TO_DEVICE); - __dma_unmap_area(cache_base_io + *offset + cache_offset, - *length, DMA_FROM_DEVICE); - *length = 0; - *offset = 0; - } else if (*offset >= cache_size) { + if (*offset >= cache_size) { *offset -= cache_size; - } else { - unsigned long cache_length = cache_size - *offset; + return 0; + } - __dma_map_area(cache_base_io + *offset + cache_offset, - cache_length, DMA_TO_DEVICE); - __dma_unmap_area(cache_base_io + *offset + cache_offset, - cache_length, DMA_FROM_DEVICE); - *length -= cache_length; + if (!rknpu_obj->cache_with_sgt) { + cache_start = cache_base_io + cache_offset; + cache_length = (*offset + *length) <= cache_size ? + *length : + cache_size - *offset; + if (dir & RKNPU_MEM_SYNC_TO_DEVICE) { +#if KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE + __dma_map_area(cache_start, cache_length, + DMA_TO_DEVICE); +#else + dcache_clean_poc((unsigned long)cache_start, + (unsigned long)cache_start + + cache_length); +#endif + } + + if (dir & RKNPU_MEM_SYNC_FROM_DEVICE) { +#if KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE + __dma_unmap_area(cache_start, cache_length, + DMA_FROM_DEVICE); +#else + dcache_inval_poc((unsigned long)cache_start, + (unsigned long)cache_start + + cache_length); +#endif + } + + *length = (*offset + *length) <= cache_size ? + 0 : + *length - cache_length; *offset = 0; + } else { + rknpu_cache_sync_with_sg(rknpu_dev, rknpu_obj, length, offset, + dir); } -#endif return 0; } @@ -1438,6 +1684,12 @@ int rknpu_gem_sync_ioctl(struct drm_device *dev, void *data, if (!(rknpu_obj->flags & RKNPU_MEM_CACHEABLE)) return -EINVAL; + if (rknpu_iommu_domain_get_and_switch(rknpu_dev, + rknpu_obj->iommu_domain_id)) { + LOG_DEV_ERROR(rknpu_dev->dev, "%s error\n", __func__); + return -EINVAL; + } + if (!(rknpu_obj->flags & RKNPU_MEM_NON_CONTIGUOUS)) { if (args->flags & RKNPU_MEM_SYNC_TO_DEVICE) { dma_sync_single_range_for_device( @@ -1460,15 +1712,14 @@ int rknpu_gem_sync_ioctl(struct drm_device *dev, void *data, IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && rknpu_obj->sram_size > 0) { rknpu_cache_sync(rknpu_obj, &length, &offset, - RKNPU_CACHE_SRAM); + RKNPU_CACHE_SRAM, args->flags); } else if (IS_ENABLED(CONFIG_NO_GKI) && rknpu_obj->nbuf_size > 0) { rknpu_cache_sync(rknpu_obj, &length, &offset, - RKNPU_CACHE_NBUF); + RKNPU_CACHE_NBUF, args->flags); } - for_each_sg(rknpu_obj->sgt->sgl, sg, rknpu_obj->sgt->nents, - i) { + for_each_sg(rknpu_obj->sgt->sgl, sg, rknpu_obj->sgt->nents, i) { if (length == 0) break; @@ -1500,5 +1751,7 @@ int rknpu_gem_sync_ioctl(struct drm_device *dev, void *data, } } + rknpu_iommu_domain_put(rknpu_dev); + return 0; } diff --git a/drivers/rknpu/rknpu_iommu.c b/drivers/rknpu/rknpu_iommu.c index 53bca78953ac4..efa97f39c8cc2 100644 --- a/drivers/rknpu/rknpu_iommu.c +++ b/drivers/rknpu/rknpu_iommu.c @@ -4,17 +4,24 @@ * Author: Felix Zeng */ +#include +#include +#include + #include "rknpu_iommu.h" +#define RKNPU_SWITCH_DOMAIN_WAIT_TIME_MS 6000 + dma_addr_t rknpu_iommu_dma_alloc_iova(struct iommu_domain *domain, size_t size, - u64 dma_limit, struct device *dev) + u64 dma_limit, struct device *dev, + bool size_aligned) { struct rknpu_iommu_dma_cookie *cookie = (void *)domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; unsigned long shift, iova_len, iova = 0; -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) - dma_addr_t limit; -#endif + unsigned long limit_pfn; + struct iova *new_iova = NULL; + bool alloc_fast = size_aligned; shift = iova_shift(iovad); iova_len = size >> shift; @@ -42,22 +49,319 @@ dma_addr_t rknpu_iommu_dma_alloc_iova(struct iommu_domain *domain, size_t size, min_t(u64, dma_limit, domain->geometry.aperture_end); #if (KERNEL_VERSION(5, 4, 0) <= LINUX_VERSION_CODE) - iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, true); + limit_pfn = dma_limit >> shift; #else - limit = min_t(dma_addr_t, dma_limit >> shift, iovad->end_pfn); - - iova = alloc_iova_fast(iovad, iova_len, limit, true); + limit_pfn = min_t(dma_addr_t, dma_limit >> shift, iovad->end_pfn); #endif + if (alloc_fast) { + iova = alloc_iova_fast(iovad, iova_len, limit_pfn, true); + } else { + new_iova = alloc_iova(iovad, iova_len, limit_pfn, size_aligned); + if (!new_iova) + return 0; + iova = new_iova->pfn_lo; + } + return (dma_addr_t)iova << shift; } void rknpu_iommu_dma_free_iova(struct rknpu_iommu_dma_cookie *cookie, - dma_addr_t iova, size_t size) + dma_addr_t iova, size_t size, bool size_aligned) +{ + struct iova_domain *iovad = &cookie->iovad; + bool alloc_fast = size_aligned; + + if (alloc_fast) + free_iova_fast(iovad, iova_pfn(iovad, iova), + size >> iova_shift(iovad)); + else + free_iova(iovad, iova_pfn(iovad, iova)); +} + +static int rknpu_dma_info_to_prot(enum dma_data_direction dir, bool coherent) +{ + int prot = coherent ? IOMMU_CACHE : 0; + + switch (dir) { + case DMA_BIDIRECTIONAL: + return prot | IOMMU_READ | IOMMU_WRITE; + case DMA_TO_DEVICE: + return prot | IOMMU_READ; + case DMA_FROM_DEVICE: + return prot | IOMMU_WRITE; + default: + return 0; + } +} + +/* + * Prepare a successfully-mapped scatterlist to give back to the caller. + * + * At this point the segments are already laid out by iommu_dma_map_sg() to + * avoid individually crossing any boundaries, so we merely need to check a + * segment's start address to avoid concatenating across one. + */ +static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, + dma_addr_t dma_addr) +{ + struct scatterlist *s, *cur = sg; + unsigned long seg_mask = dma_get_seg_boundary(dev); + unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev); + int i, count = 0; + + for_each_sg(sg, s, nents, i) { + /* Restore this segment's original unaligned fields first */ +#if KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE + dma_addr_t s_dma_addr = sg_dma_address(s); +#endif + unsigned int s_iova_off = sg_dma_address(s); + unsigned int s_length = sg_dma_len(s); + unsigned int s_iova_len = s->length; + + sg_dma_address(s) = DMA_MAPPING_ERROR; + sg_dma_len(s) = 0; + +#if KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE + if (sg_is_dma_bus_address(s)) { + if (i > 0) + cur = sg_next(cur); + + sg_dma_unmark_bus_address(s); + sg_dma_address(cur) = s_dma_addr; + sg_dma_len(cur) = s_length; + sg_dma_mark_bus_address(cur); + count++; + cur_len = 0; + continue; + } +#endif + + s->offset += s_iova_off; + s->length = s_length; + + /* + * Now fill in the real DMA data. If... + * - there is a valid output segment to append to + * - and this segment starts on an IOVA page boundary + * - but doesn't fall at a segment boundary + * - and wouldn't make the resulting output segment too long + */ + if (cur_len && !s_iova_off && (dma_addr & seg_mask) && + (max_len - cur_len >= s_length)) { + /* ...then concatenate it with the previous one */ + cur_len += s_length; + } else { + /* Otherwise start the next output segment */ + if (i > 0) + cur = sg_next(cur); + cur_len = s_length; + count++; + + sg_dma_address(cur) = dma_addr + s_iova_off; + } + + sg_dma_len(cur) = cur_len; + dma_addr += s_iova_len; + + if (s_length + s_iova_off < s_iova_len) + cur_len = 0; + } + return count; +} + +/* + * If mapping failed, then just restore the original list, + * but making sure the DMA fields are invalidated. + */ +static void __invalidate_sg(struct scatterlist *sg, int nents) +{ + struct scatterlist *s; + int i; + +#if KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE + for_each_sg(sg, s, nents, i) { + if (sg_is_dma_bus_address(s)) { + sg_dma_unmark_bus_address(s); + } else { + if (sg_dma_address(s) != DMA_MAPPING_ERROR) + s->offset += sg_dma_address(s); + if (sg_dma_len(s)) + s->length = sg_dma_len(s); + } + sg_dma_address(s) = DMA_MAPPING_ERROR; + sg_dma_len(s) = 0; + } +#else + for_each_sg(sg, s, nents, i) { + if (sg_dma_address(s) != DMA_MAPPING_ERROR) + s->offset += sg_dma_address(s); + if (sg_dma_len(s)) + s->length = sg_dma_len(s); + sg_dma_address(s) = DMA_MAPPING_ERROR; + sg_dma_len(s) = 0; + } +#endif +} + +int rknpu_iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + bool iova_aligned) +{ + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct rknpu_iommu_dma_cookie *cookie = (void *)domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + struct scatterlist *s = NULL, *prev = NULL; + int prot = rknpu_dma_info_to_prot(dir, dev_is_dma_coherent(dev)); + dma_addr_t iova; + unsigned long iova_len = 0; + unsigned long mask = dma_get_seg_boundary(dev); + ssize_t ret = -EINVAL; + int i = 0; + + if (iova_aligned) + return dma_map_sg(dev, sg, nents, dir); + + /* + * Work out how much IOVA space we need, and align the segments to + * IOVA granules for the IOMMU driver to handle. With some clever + * trickery we can modify the list in-place, but reversibly, by + * stashing the unaligned parts in the as-yet-unused DMA fields. + */ + for_each_sg(sg, s, nents, i) { + size_t s_iova_off = iova_offset(iovad, s->offset); + size_t s_length = s->length; + size_t pad_len = (mask - iova_len + 1) & mask; + + sg_dma_address(s) = s_iova_off; + sg_dma_len(s) = s_length; + s->offset -= s_iova_off; + s_length = iova_align(iovad, s_length + s_iova_off); + s->length = s_length; + + /* + * Due to the alignment of our single IOVA allocation, we can + * depend on these assumptions about the segment boundary mask: + * - If mask size >= IOVA size, then the IOVA range cannot + * possibly fall across a boundary, so we don't care. + * - If mask size < IOVA size, then the IOVA range must start + * exactly on a boundary, therefore we can lay things out + * based purely on segment lengths without needing to know + * the actual addresses beforehand. + * - The mask must be a power of 2, so pad_len == 0 if + * iova_len == 0, thus we cannot dereference prev the first + * time through here (i.e. before it has a meaningful value). + */ + if (pad_len && pad_len < s_length - 1) { + prev->length += pad_len; + iova_len += pad_len; + } + + iova_len += s_length; + prev = s; + } + + if (!iova_len) { + ret = __finalise_sg(dev, sg, nents, 0); + goto out; + } + + iova = rknpu_iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), + dev, iova_aligned); + if (!iova) { + ret = -ENOMEM; + LOG_ERROR("failed to allocate IOVA: %zd\n", ret); + goto out_restore_sg; + } + + ret = iommu_map_sg(domain, iova, sg, nents, prot); + if (ret < 0 || ret < iova_len) { + LOG_ERROR("failed to map SG: %zd\n", ret); + goto out_free_iova; + } + + return __finalise_sg(dev, sg, nents, iova); + +out_free_iova: + rknpu_iommu_dma_free_iova(cookie, iova, iova_len, iova_aligned); +out_restore_sg: + __invalidate_sg(sg, nents); +out: + + if (ret < 0) + ret = 0; + + return ret; +} + +void rknpu_iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + bool iova_aligned) { + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct rknpu_iommu_dma_cookie *cookie = (void *)domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; + size_t iova_off = 0; + dma_addr_t end = 0, start = 0; + struct scatterlist *tmp = NULL; + dma_addr_t dma_addr = 0; + size_t size = 0; + int i = 0; + + if (iova_aligned) + return dma_unmap_sg(dev, sg, nents, dir); + +#if KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE + /* + * The scatterlist segments are mapped into a single + * contiguous IOVA allocation, the start and end points + * just have to be determined. + */ + for_each_sg(sg, tmp, nents, i) { + if (sg_is_dma_bus_address(tmp)) { + sg_dma_unmark_bus_address(tmp); + continue; + } + + if (sg_dma_len(tmp) == 0) + break; + + start = sg_dma_address(tmp); + break; + } + + nents -= i; + for_each_sg(tmp, tmp, nents, i) { + if (sg_is_dma_bus_address(tmp)) { + sg_dma_unmark_bus_address(tmp); + continue; + } + + if (sg_dma_len(tmp) == 0) + break; - free_iova_fast(iovad, iova_pfn(iovad, iova), size >> iova_shift(iovad)); + end = sg_dma_address(tmp) + sg_dma_len(tmp); + } +#else + start = sg_dma_address(sg); + for_each_sg(sg_next(sg), tmp, nents - 1, i) { + if (sg_dma_len(tmp) == 0) + break; + sg = tmp; + } + end = sg_dma_address(sg) + sg_dma_len(sg); +#endif + + dma_addr = start; + size = end - start; + iova_off = iova_offset(iovad, start); + + if (end) { + dma_addr -= iova_off; + size = iova_align(iovad, size + iova_off); + iommu_unmap(domain, dma_addr, size); + rknpu_iommu_dma_free_iova(cookie, dma_addr, size, iova_aligned); + } } #if defined(CONFIG_IOMMU_API) && defined(CONFIG_NO_GKI) @@ -134,11 +438,8 @@ int rknpu_iommu_switch_domain(struct rknpu_device *rknpu_dev, int domain_id) if (!bus) return -EFAULT; - mutex_lock(&rknpu_dev->domain_lock); - src_domain_id = rknpu_dev->iommu_domain_id; if (domain_id == src_domain_id) { - mutex_unlock(&rknpu_dev->domain_lock); return 0; } @@ -147,7 +448,6 @@ int rknpu_iommu_switch_domain(struct rknpu_device *rknpu_dev, int domain_id) LOG_DEV_ERROR( rknpu_dev->dev, "mismatch domain get from iommu_get_domain_for_dev\n"); - mutex_unlock(&rknpu_dev->domain_lock); return -EINVAL; } @@ -166,7 +466,6 @@ int rknpu_iommu_switch_domain(struct rknpu_device *rknpu_dev, int domain_id) "failed to reattach src iommu domain, id: %d\n", src_domain_id); } - mutex_unlock(&rknpu_dev->domain_lock); return ret; } rknpu_dev->iommu_domain_id = domain_id; @@ -177,7 +476,6 @@ int rknpu_iommu_switch_domain(struct rknpu_device *rknpu_dev, int domain_id) if (!dst_domain) { LOG_DEV_ERROR(rknpu_dev->dev, "failed to allocate iommu domain\n"); - mutex_unlock(&rknpu_dev->domain_lock); return -EIO; } // init domain iova_cookie @@ -191,7 +489,6 @@ int rknpu_iommu_switch_domain(struct rknpu_device *rknpu_dev, int domain_id) "failed to attach iommu domain, id: %d, ret: %d\n", domain_id, ret); iommu_domain_free(dst_domain); - mutex_unlock(&rknpu_dev->domain_lock); return ret; } @@ -208,19 +505,74 @@ int rknpu_iommu_switch_domain(struct rknpu_device *rknpu_dev, int domain_id) // reset default iommu domain rknpu_dev->iommu_group->default_domain = dst_domain; - mutex_unlock(&rknpu_dev->domain_lock); - LOG_INFO("switch iommu domain from %d to %d\n", src_domain_id, domain_id); return ret; } +int rknpu_iommu_domain_get_and_switch(struct rknpu_device *rknpu_dev, + int domain_id) +{ + unsigned long timeout_jiffies = + msecs_to_jiffies(RKNPU_SWITCH_DOMAIN_WAIT_TIME_MS); + unsigned long start = jiffies; + int ret = -EINVAL; + + while (true) { + mutex_lock(&rknpu_dev->domain_lock); + + if (domain_id == rknpu_dev->iommu_domain_id) { + atomic_inc(&rknpu_dev->iommu_domain_refcount); + mutex_unlock(&rknpu_dev->domain_lock); + break; + } + + if (atomic_read(&rknpu_dev->iommu_domain_refcount) == 0) { + ret = rknpu_iommu_switch_domain(rknpu_dev, domain_id); + if (ret) { + LOG_DEV_ERROR( + rknpu_dev->dev, + "failed to switch iommu domain, id: %d, ret: %d\n", + domain_id, ret); + mutex_unlock(&rknpu_dev->domain_lock); + return ret; + } + atomic_inc(&rknpu_dev->iommu_domain_refcount); + mutex_unlock(&rknpu_dev->domain_lock); + break; + } + + mutex_unlock(&rknpu_dev->domain_lock); + + usleep_range(10, 100); + if (time_after(jiffies, start + timeout_jiffies)) { + LOG_DEV_ERROR( + rknpu_dev->dev, + "switch iommu domain time out, failed to switch iommu domain, id: %d\n", + domain_id); + return -EINVAL; + } + } + + return 0; +} + +int rknpu_iommu_domain_put(struct rknpu_device *rknpu_dev) +{ + atomic_dec(&rknpu_dev->iommu_domain_refcount); + + return 0; +} + void rknpu_iommu_free_domains(struct rknpu_device *rknpu_dev) { int i = 0; - rknpu_iommu_switch_domain(rknpu_dev, 0); + if (rknpu_iommu_domain_get_and_switch(rknpu_dev, 0)) { + LOG_DEV_ERROR(rknpu_dev->dev, "%s error\n", __func__); + return; + } for (i = 1; i < RKNPU_MAX_IOMMU_DOMAIN_NUM; i++) { struct iommu_domain *domain = rknpu_dev->iommu_domains[i]; @@ -233,6 +585,8 @@ void rknpu_iommu_free_domains(struct rknpu_device *rknpu_dev) rknpu_dev->iommu_domains[i] = NULL; } + + rknpu_iommu_domain_put(rknpu_dev); } #else @@ -247,6 +601,17 @@ int rknpu_iommu_switch_domain(struct rknpu_device *rknpu_dev, int domain_id) return 0; } +int rknpu_iommu_domain_get_and_switch(struct rknpu_device *rknpu_dev, + int domain_id) +{ + return 0; +} + +int rknpu_iommu_domain_put(struct rknpu_device *rknpu_dev) +{ + return 0; +} + void rknpu_iommu_free_domains(struct rknpu_device *rknpu_dev) { } diff --git a/drivers/rknpu/rknpu_job.c b/drivers/rknpu/rknpu_job.c index 312d9c73df75a..23ed8e5cf8419 100644 --- a/drivers/rknpu/rknpu_job.c +++ b/drivers/rknpu/rknpu_job.c @@ -210,8 +210,9 @@ static inline int rknpu_job_wait(struct rknpu_job *job) (elapse_time_us < args->timeout * 1000); spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); LOG_ERROR( - "job: %p, iommu domain id: %d, wait_count: %d, continue wait: %d, commit elapse time: %lldus, wait time: %lldus, timeout: %uus\n", - job, job->iommu_domain_id, wait_count, + "job: %p, mask: %#x, job iommu domain id: %d, dev iommu domain id: %d, wait_count: %d, continue wait: %d, commit elapse time: %lldus, wait time: %lldus, timeout: %uus\n", + job, args->core_mask, job->iommu_domain_id, + rknpu_dev->iommu_domain_id, wait_count, continue_wait, (job->hw_commit_time == 0 ? 0 : elapse_time_us), ktime_us_delta(ktime_get(), job->timestamp), @@ -452,9 +453,8 @@ static void rknpu_job_next(struct rknpu_device *rknpu_dev, int core_index) job->hw_recoder_time = job->hw_commit_time; spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); - if (atomic_dec_and_test(&job->run_count)) { + if (atomic_dec_and_test(&job->run_count)) rknpu_job_commit(job); - } } static void rknpu_job_done(struct rknpu_job *job, int ret, int core_index) @@ -485,6 +485,8 @@ static void rknpu_job_done(struct rknpu_job *job, int ret, int core_index) if (atomic_dec_and_test(&job->interrupt_count)) { int use_core_num = job->use_core_num; + rknpu_iommu_domain_put(rknpu_dev); + job->flags |= RKNPU_JOB_DONE; job->ret = ret; @@ -535,6 +537,11 @@ static void rknpu_job_schedule(struct rknpu_job *job) atomic_set(&job->interrupt_count, job->use_core_num); } + if (rknpu_iommu_domain_get_and_switch(rknpu_dev, job->iommu_domain_id)) { + job->ret = -EINVAL; + return; + } + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); for (i = 0; i < rknpu_dev->config->num_irqs; i++) { if (job->args->core_mask & rknpu_core_mask(i)) { @@ -558,6 +565,8 @@ static void rknpu_job_abort(struct rknpu_job *job) unsigned long flags; int i = 0; + rknpu_iommu_domain_put(rknpu_dev); + msleep(100); spin_lock_irqsave(&rknpu_dev->irq_lock, flags); @@ -843,8 +852,6 @@ int rknpu_submit_ioctl(struct drm_device *dev, void *data, struct rknpu_device *rknpu_dev = dev_get_drvdata(dev->dev); struct rknpu_submit *args = data; - rknpu_iommu_switch_domain(rknpu_dev, args->iommu_domain_id); - return rknpu_submit(rknpu_dev, args); } #endif diff --git a/drivers/rknpu/rknpu_reset.c b/drivers/rknpu/rknpu_reset.c index 36657a7852feb..7bcf75b028602 100644 --- a/drivers/rknpu/rknpu_reset.c +++ b/drivers/rknpu/rknpu_reset.c @@ -148,6 +148,9 @@ int rknpu_soft_reset(struct rknpu_device *rknpu_dev) rknpu_dev->soft_reseting = false; + if (rknpu_dev->config->state_init != NULL) + rknpu_dev->config->state_init(rknpu_dev); + mutex_unlock(&rknpu_dev->reset_lock); #endif