diff --git a/drivers/clk/bcm/clk-raspberrypi.c b/drivers/clk/bcm/clk-raspberrypi.c index dcc3ae5fb91650..90103d9498d0bf 100644 --- a/drivers/clk/bcm/clk-raspberrypi.c +++ b/drivers/clk/bcm/clk-raspberrypi.c @@ -66,7 +66,8 @@ const struct raspberrypi_clk_data *clk_hw_to_data(const struct clk_hw *hw) struct raspberrypi_clk_variant { bool export; char *clkdev; - unsigned long min_rate; + u32 min_rate; + u32 max_rate; bool minimize; bool maximize; u32 flags; @@ -141,17 +142,17 @@ raspberrypi_clk_variants[RPI_FIRMWARE_NUM_CLK_ID] = { [RPI_FIRMWARE_PIXEL_CLK_ID] = { .export = true, .minimize = true, - .flags = CLK_IS_CRITICAL, + .flags = CLK_IGNORE_UNUSED, }, [RPI_FIRMWARE_HEVC_CLK_ID] = { .export = true, .minimize = true, - .flags = CLK_IS_CRITICAL, + .flags = CLK_IGNORE_UNUSED, }, [RPI_FIRMWARE_ISP_CLK_ID] = { .export = true, .minimize = true, - .flags = CLK_IS_CRITICAL, + .flags = CLK_IGNORE_UNUSED, }, [RPI_FIRMWARE_PIXEL_BVB_CLK_ID] = { .export = true, @@ -290,16 +291,22 @@ static int raspberrypi_fw_dumb_determine_rate(struct clk_hw *hw, static int raspberrypi_fw_prepare(struct clk_hw *hw) { const struct raspberrypi_clk_data *data = clk_hw_to_data(hw); + struct raspberrypi_clk_variant *variant = data->variant; struct raspberrypi_clk *rpi = data->rpi; u32 state = RPI_FIRMWARE_STATE_ENABLE_BIT; int ret; ret = raspberrypi_clock_property(rpi->firmware, data, RPI_FIRMWARE_SET_CLOCK_STATE, &state); - if (ret) + if (ret) { dev_err_ratelimited(rpi->dev, "Failed to set clock %s state to on: %d\n", clk_hw_get_name(hw), ret); + return ret; + } + + if (variant->maximize) + ret = raspberrypi_fw_set_rate(hw, variant->max_rate, 0); return ret; } @@ -307,10 +314,19 @@ static int raspberrypi_fw_prepare(struct clk_hw *hw) static void raspberrypi_fw_unprepare(struct clk_hw *hw) { const struct raspberrypi_clk_data *data = clk_hw_to_data(hw); + struct raspberrypi_clk_variant *variant = data->variant; struct raspberrypi_clk *rpi = data->rpi; u32 state = 0; int ret; + /* + * On current firmware versions, RPI_FIRMWARE_SET_CLOCK_STATE doesn't + * actually power off the clock. To achieve meaningful power consumption + * reduction, the driver needs to set the clock rate to minimum before + * disabling it. + */ + raspberrypi_fw_set_rate(hw, variant->min_rate, 0); + ret = raspberrypi_clock_property(rpi->firmware, data, RPI_FIRMWARE_SET_CLOCK_STATE, &state); if (ret) @@ -335,7 +351,7 @@ static struct clk_hw *raspberrypi_clk_register(struct raspberrypi_clk *rpi, { struct raspberrypi_clk_data *data; struct clk_init_data init = {}; - u32 min_rate, max_rate; + unsigned long rate; int ret; data = devm_kzalloc(rpi->dev, sizeof(*data), GFP_KERNEL); @@ -355,18 +371,20 @@ static struct clk_hw *raspberrypi_clk_register(struct raspberrypi_clk *rpi, data->hw.init = &init; - ret = raspberrypi_clock_property(rpi->firmware, data, - RPI_FIRMWARE_GET_MIN_CLOCK_RATE, - &min_rate); - if (ret) { - dev_err(rpi->dev, "Failed to get clock %d min freq: %d\n", - id, ret); - return ERR_PTR(ret); + if (!variant->min_rate) { + ret = raspberrypi_clock_property(rpi->firmware, data, + RPI_FIRMWARE_GET_MIN_CLOCK_RATE, + &variant->min_rate); + if (ret) { + dev_err(rpi->dev, "Failed to get clock %d min freq: %d\n", + id, ret); + return ERR_PTR(ret); + } } ret = raspberrypi_clock_property(rpi->firmware, data, RPI_FIRMWARE_GET_MAX_CLOCK_RATE, - &max_rate); + &variant->max_rate); if (ret) { dev_err(rpi->dev, "Failed to get clock %d max freq: %d\n", id, ret); @@ -377,7 +395,7 @@ static struct clk_hw *raspberrypi_clk_register(struct raspberrypi_clk *rpi, if (ret) return ERR_PTR(ret); - clk_hw_set_rate_range(&data->hw, min_rate, max_rate); + clk_hw_set_rate_range(&data->hw, variant->min_rate, variant->max_rate); if (variant->clkdev) { ret = devm_clk_hw_register_clkdev(rpi->dev, &data->hw, @@ -388,20 +406,11 @@ static struct clk_hw *raspberrypi_clk_register(struct raspberrypi_clk *rpi, } } - if (variant->maximize) - variant->min_rate = max_rate; - - if (variant->min_rate) { - unsigned long rate; - - clk_hw_set_rate_range(&data->hw, variant->min_rate, max_rate); - - rate = raspberrypi_fw_get_rate(&data->hw, 0); - if (rate < variant->min_rate) { - ret = raspberrypi_fw_set_rate(&data->hw, variant->min_rate, 0); - if (ret) - return ERR_PTR(ret); - } + rate = raspberrypi_fw_get_rate(&data->hw, 0); + if (rate < variant->min_rate) { + ret = raspberrypi_fw_set_rate(&data->hw, variant->min_rate, 0); + if (ret) + return ERR_PTR(ret); } return &data->hw; diff --git a/drivers/gpu/drm/v3d/Makefile b/drivers/gpu/drm/v3d/Makefile index fcf710926057b3..c2d6d4b9534353 100644 --- a/drivers/gpu/drm/v3d/Makefile +++ b/drivers/gpu/drm/v3d/Makefile @@ -14,7 +14,8 @@ v3d-y := \ v3d_sched.o \ v3d_sysfs.o \ v3d_submit.o \ - v3d_gemfs.o + v3d_gemfs.o \ + v3d_power.o v3d-$(CONFIG_DEBUG_FS) += v3d_debugfs.o diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c index 7e789e181af0ac..d4cd4360ad21a2 100644 --- a/drivers/gpu/drm/v3d/v3d_debugfs.c +++ b/drivers/gpu/drm/v3d/v3d_debugfs.c @@ -96,7 +96,11 @@ static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused) struct drm_debugfs_entry *entry = m->private; struct drm_device *dev = entry->dev; struct v3d_dev *v3d = to_v3d_dev(dev); - int i, core; + int i, core, ret; + + ret = v3d_pm_runtime_get(v3d); + if (ret) + return ret; for (i = 0; i < ARRAY_SIZE(v3d_hub_reg_defs); i++) { const struct v3d_reg_def *def = &v3d_hub_reg_defs[i]; @@ -138,6 +142,8 @@ static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused) } } + v3d_pm_runtime_put(v3d); + return 0; } @@ -147,7 +153,11 @@ static int v3d_v3d_debugfs_ident(struct seq_file *m, void *unused) struct drm_device *dev = entry->dev; struct v3d_dev *v3d = to_v3d_dev(dev); u32 ident0, ident1, ident2, ident3, cores; - int core; + int core, ret; + + ret = v3d_pm_runtime_get(v3d); + if (ret) + return ret; ident0 = V3D_READ(V3D_HUB_IDENT0); ident1 = V3D_READ(V3D_HUB_IDENT1); @@ -206,6 +216,8 @@ static int v3d_v3d_debugfs_ident(struct seq_file *m, void *unused) } } + v3d_pm_runtime_put(v3d); + return 0; } @@ -233,6 +245,11 @@ static int v3d_measure_clock(struct seq_file *m, void *unused) uint32_t cycles; int core = 0; int measure_ms = 1000; + int ret; + + ret = v3d_pm_runtime_get(v3d); + if (ret) + return ret; if (v3d->ver >= V3D_GEN_41) { int cycle_count_reg = V3D_PCTR_CYCLE_COUNT(v3d->ver); @@ -252,6 +269,8 @@ static int v3d_measure_clock(struct seq_file *m, void *unused) msleep(measure_ms); cycles = V3D_CORE_READ(core, V3D_PCTR_0_PCTR0); + v3d_pm_runtime_put(v3d); + seq_printf(m, "cycles: %d (%d.%d Mhz)\n", cycles, cycles / (measure_ms * 1000), diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index cf33cec1075ea4..324cba8b7d1585 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -26,8 +26,6 @@ #include #include -#include - #include #include "v3d_drv.h" @@ -65,6 +63,7 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data, [DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_IDENT1, [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_IDENT2, }; + int ret; if (args->pad != 0) return -EINVAL; @@ -81,12 +80,19 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data, if (args->value != 0) return -EINVAL; + ret = v3d_pm_runtime_get(v3d); + if (ret) + return ret; + if (args->param >= DRM_V3D_PARAM_V3D_CORE0_IDENT0 && args->param <= DRM_V3D_PARAM_V3D_CORE0_IDENT2) { args->value = V3D_CORE_READ(0, offset); } else { args->value = V3D_READ(offset); } + + v3d_pm_runtime_put(v3d); + return 0; } @@ -293,36 +299,6 @@ static const struct of_device_id v3d_of_match[] = { }; MODULE_DEVICE_TABLE(of, v3d_of_match); -static void -v3d_idle_sms(struct v3d_dev *v3d) -{ - if (v3d->ver < V3D_GEN_71) - return; - - V3D_SMS_WRITE(V3D_SMS_TEE_CS, V3D_SMS_CLEAR_POWER_OFF); - - if (wait_for((V3D_GET_FIELD(V3D_SMS_READ(V3D_SMS_TEE_CS), - V3D_SMS_STATE) == V3D_SMS_IDLE), 100)) { - DRM_ERROR("Failed to power up SMS\n"); - } - - v3d_reset_sms(v3d); -} - -static void -v3d_power_off_sms(struct v3d_dev *v3d) -{ - if (v3d->ver < V3D_GEN_71) - return; - - V3D_SMS_WRITE(V3D_SMS_TEE_CS, V3D_SMS_POWER_OFF); - - if (wait_for((V3D_GET_FIELD(V3D_SMS_READ(V3D_SMS_TEE_CS), - V3D_SMS_STATE) == V3D_SMS_POWER_OFF_STATE), 100)) { - DRM_ERROR("Failed to power off SMS\n"); - } -} - static int map_regs(struct v3d_dev *v3d, void __iomem **regs, const char *name) { @@ -333,8 +309,6 @@ map_regs(struct v3d_dev *v3d, void __iomem **regs, const char *name) static int v3d_platform_drm_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct rpi_firmware *firmware; - struct device_node *node; struct drm_device *drm; struct v3d_dev *v3d; enum v3d_gen gen; @@ -368,23 +342,66 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) return ret; } + if (v3d->ver < V3D_GEN_41) { + ret = map_regs(v3d, &v3d->gca_regs, "gca"); + if (ret) + return ret; + } + + v3d->reset = devm_reset_control_get_optional_exclusive(dev, NULL); + if (IS_ERR(v3d->reset)) + return dev_err_probe(dev, PTR_ERR(v3d->reset), + "Failed to get reset control\n"); + + if (!v3d->reset) { + ret = map_regs(v3d, &v3d->bridge_regs, "bridge"); + if (ret) { + dev_err(dev, "Failed to get bridge registers\n"); + return ret; + } + } + v3d->clk = devm_clk_get_optional(dev, NULL); if (IS_ERR(v3d->clk)) return dev_err_probe(dev, PTR_ERR(v3d->clk), "Failed to get V3D clock\n"); - ret = clk_prepare_enable(v3d->clk); - if (ret) { - dev_err(&pdev->dev, "Couldn't enable the V3D clock\n"); + ret = v3d_irq_init(v3d); + if (ret) return ret; + + v3d_perfmon_init(v3d); + + v3d->mmu_scratch = dma_alloc_wc(dev, 4096, &v3d->mmu_scratch_paddr, + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); + if (!v3d->mmu_scratch) { + dev_err(dev, "Failed to allocate MMU scratch page\n"); + return -ENOMEM; } - v3d_idle_sms(v3d); + ret = v3d_gem_init(drm); + if (ret) + goto dma_free; + + ret = devm_pm_runtime_enable(dev); + if (ret) + goto gem_destroy; + + ret = pm_runtime_resume_and_get(dev); + if (ret) + goto gem_destroy; + + /* If PM is disabled, we need to call v3d_power_resume() manually. */ + if (!IS_ENABLED(CONFIG_PM)) { + ret = v3d_power_resume(dev); + if (ret) + goto gem_destroy; + } mmu_debug = V3D_READ(V3D_MMU_DEBUG_INFO); mask = DMA_BIT_MASK(30 + V3D_GET_FIELD(mmu_debug, V3D_MMU_PA_WIDTH)); ret = dma_set_mask_and_coherent(dev, mask); if (ret) - goto clk_disable; + goto runtime_pm_put; v3d->va_width = 30 + V3D_GET_FIELD(mmu_debug, V3D_MMU_VA_WIDTH); @@ -402,93 +419,32 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) ident3 = V3D_READ(V3D_HUB_IDENT3); v3d->rev = V3D_GET_FIELD(ident3, V3D_HUB_IDENT3_IPREV); - v3d_perfmon_init(v3d); - - v3d->reset = devm_reset_control_get_exclusive(dev, NULL); - if (IS_ERR(v3d->reset)) { - ret = PTR_ERR(v3d->reset); - - if (ret == -EPROBE_DEFER) - goto clk_disable; + v3d_init_hw_state(v3d); - v3d->reset = NULL; - ret = map_regs(v3d, &v3d->bridge_regs, "bridge"); - if (ret) { - dev_err(dev, - "Failed to get reset control or bridge regs\n"); - goto clk_disable; - } - } - - node = rpi_firmware_find_node(); - if (!node) { - ret = -EINVAL; - goto clk_disable; - } - - firmware = rpi_firmware_get(node); - of_node_put(node); - if (!firmware) { - ret = -EPROBE_DEFER; - goto clk_disable; - } - - v3d->clk_up_rate = rpi_firmware_clk_get_max_rate(firmware, - RPI_FIRMWARE_V3D_CLK_ID); - rpi_firmware_put(firmware); - - /* For downclocking, drop it to the minimum frequency we can get from - * the CPRMAN clock generator dividing off our parent. The divider is - * 4 bits, but ask for just higher than that so that rounding doesn't - * make cprman reject our rate. - */ - v3d->clk_down_rate = - (clk_get_rate(clk_get_parent(v3d->clk)) / (1 << 4)) + 10000; - - if (v3d->ver < V3D_GEN_41) { - ret = map_regs(v3d, &v3d->gca_regs, "gca"); - if (ret) - goto clk_disable; - } - - v3d->mmu_scratch = dma_alloc_wc(dev, 4096, &v3d->mmu_scratch_paddr, - GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); - if (!v3d->mmu_scratch) { - dev_err(dev, "Failed to allocate MMU scratch page\n"); - ret = -ENOMEM; - goto clk_disable; - } - - ret = v3d_gem_init(drm); - if (ret) - goto dma_free; - - ret = v3d_irq_init(v3d); - if (ret) - goto gem_destroy; + pm_runtime_set_autosuspend_delay(dev, 50); + pm_runtime_use_autosuspend(dev); ret = drm_dev_register(drm, 0); if (ret) - goto irq_disable; + goto runtime_pm_put; ret = v3d_sysfs_init(dev); if (ret) goto drm_unregister; - ret = clk_set_min_rate(v3d->clk, v3d->clk_down_rate); - WARN_ON_ONCE(ret != 0); + + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); return 0; drm_unregister: drm_dev_unregister(drm); -irq_disable: - v3d_irq_disable(v3d); +runtime_pm_put: + pm_runtime_put_sync_suspend(dev); gem_destroy: v3d_gem_destroy(drm); dma_free: dma_free_wc(dev, 4096, v3d->mmu_scratch, v3d->mmu_scratch_paddr); -clk_disable: - clk_disable_unprepare(v3d->clk); return ret; } @@ -502,22 +458,27 @@ static void v3d_platform_drm_remove(struct platform_device *pdev) drm_dev_unregister(drm); - v3d_gem_destroy(drm); + pm_runtime_suspend(dev); - dma_free_wc(v3d->drm.dev, 4096, v3d->mmu_scratch, - v3d->mmu_scratch_paddr); + /* If PM is disabled, we need to call v3d_power_suspend() manually. */ + if (!IS_ENABLED(CONFIG_PM)) + v3d_power_suspend(dev); - v3d_power_off_sms(v3d); + v3d_gem_destroy(drm); - clk_disable_unprepare(v3d->clk); + dma_free_wc(dev, 4096, v3d->mmu_scratch, v3d->mmu_scratch_paddr); } +static DEFINE_RUNTIME_DEV_PM_OPS(v3d_pm_ops, v3d_power_suspend, + v3d_power_resume, NULL); + static struct platform_driver v3d_platform_driver = { .probe = v3d_platform_drm_probe, .remove = v3d_platform_drm_remove, .driver = { .name = "v3d", .of_match_table = v3d_of_match, + .pm = pm_ptr(&v3d_pm_ops), }, }; diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 4ebb3dcd7ab4a8..a1da44ec656750 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -134,11 +135,6 @@ struct v3d_dev { void __iomem *gca_regs; void __iomem *sms_regs; struct clk *clk; - struct delayed_work clk_down_work; - unsigned long clk_up_rate, clk_down_rate; - struct mutex clk_lock; - u32 clk_refcount; - bool clk_up; struct reset_control *reset; @@ -335,6 +331,8 @@ struct v3d_job { /* Callback for the freeing of the job on refcount going to 0. */ void (*free)(struct kref *ref); + + bool has_pm_ref; }; struct v3d_bin_job { @@ -575,6 +573,7 @@ extern const struct dma_fence_ops v3d_fence_ops; struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue q); /* v3d_gem.c */ +void v3d_init_hw_state(struct v3d_dev *v3d); int v3d_gem_init(struct drm_device *dev); void v3d_gem_destroy(struct drm_device *dev); void v3d_reset_sms(struct v3d_dev *v3d); @@ -612,6 +611,20 @@ int v3d_mmu_set_page_table(struct v3d_dev *v3d); void v3d_mmu_insert_ptes(struct v3d_bo *bo); void v3d_mmu_remove_ptes(struct v3d_bo *bo); +/* v3d_power.c */ +int v3d_power_suspend(struct device *dev); +int v3d_power_resume(struct device *dev); + +static __always_inline int v3d_pm_runtime_get(struct v3d_dev *v3d) +{ + return pm_runtime_resume_and_get(v3d->drm.dev); +} + +static __always_inline int v3d_pm_runtime_put(struct v3d_dev *v3d) +{ + return pm_runtime_put_autosuspend(v3d->drm.dev); +} + /* v3d_sched.c */ void v3d_timestamp_query_info_free(struct v3d_timestamp_query_info *query_info, unsigned int count); @@ -645,4 +658,3 @@ int v3d_perfmon_set_global_ioctl(struct drm_device *dev, void *data, /* v3d_sysfs.c */ int v3d_sysfs_init(struct device *dev); void v3d_sysfs_destroy(struct device *dev); -void v3d_submit_init(struct drm_device *dev); diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 62fcd8c9cbc4be..82fda76a2daf1d 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -36,13 +36,6 @@ v3d_init_core(struct v3d_dev *v3d, int core) V3D_CORE_WRITE(core, V3D_CTL_L2TFLEND, ~0); } -/* Sets invariant state for the HW. */ -static void -v3d_init_hw_state(struct v3d_dev *v3d) -{ - v3d_init_core(v3d, 0); -} - static void v3d_idle_axi(struct v3d_dev *v3d, int core) { @@ -259,6 +252,13 @@ v3d_invalidate_caches(struct v3d_dev *v3d) v3d_invalidate_slices(v3d, 0); } +/* Sets invariant state for the HW. */ +void +v3d_init_hw_state(struct v3d_dev *v3d) +{ + v3d_init_core(v3d, 0); +} + int v3d_gem_init(struct drm_device *dev) { @@ -291,8 +291,6 @@ v3d_gem_init(struct drm_device *dev) if (ret) return ret; - v3d_submit_init(dev); - /* Note: We don't allocate address 0. Various bits of HW * treat 0 as special, such as the occlusion query counters * where 0 means "disabled". @@ -309,9 +307,6 @@ v3d_gem_init(struct drm_device *dev) return -ENOMEM; } - v3d_init_hw_state(v3d); - v3d_mmu_set_page_table(v3d); - v3d_gemfs_init(v3d); ret = v3d_sched_init(v3d); diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index 2a7988adcd04b8..d9fd912e86bbaa 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -251,17 +251,10 @@ v3d_hub_irq(int irq, void *arg) int v3d_irq_init(struct v3d_dev *v3d) { - int irq, ret, core; + int irq, ret; INIT_WORK(&v3d->overflow_mem_work, v3d_overflow_mem_work); - /* Clear any pending interrupts someone might have left around - * for us. - */ - for (core = 0; core < v3d->cores; core++) - V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS(v3d->ver)); - V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS(v3d->ver)); - irq = platform_get_irq_optional(v3d_to_pdev(v3d), 1); if (irq == -EPROBE_DEFER) return irq; @@ -299,7 +292,6 @@ v3d_irq_init(struct v3d_dev *v3d) goto fail; } - v3d_irq_enable(v3d); return 0; fail: @@ -313,6 +305,11 @@ v3d_irq_enable(struct v3d_dev *v3d) { int core; + /* Clear any pending interrupts someone might have left around for us. */ + for (core = 0; core < v3d->cores; core++) + V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS(v3d->ver)); + V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS(v3d->ver)); + /* Enable our set of interrupts, masking out any others. */ for (core = 0; core < v3d->cores; core++) { V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~V3D_CORE_IRQS(v3d->ver)); diff --git a/drivers/gpu/drm/v3d/v3d_mmu.c b/drivers/gpu/drm/v3d/v3d_mmu.c index a25d25a8ae617b..01f0b7c7b864dc 100644 --- a/drivers/gpu/drm/v3d/v3d_mmu.c +++ b/drivers/gpu/drm/v3d/v3d_mmu.c @@ -37,7 +37,11 @@ static bool v3d_mmu_is_aligned(u32 page, u32 page_address, size_t alignment) int v3d_mmu_flush_all(struct v3d_dev *v3d) { - int ret; + int ret = 0; + + /* Flush the PTs only if we're already awake */ + if (!pm_runtime_get_if_active(v3d->drm.dev)) + return 0; V3D_WRITE(V3D_MMUC_CONTROL, V3D_MMUC_CONTROL_FLUSH | V3D_MMUC_CONTROL_ENABLE); @@ -46,7 +50,7 @@ int v3d_mmu_flush_all(struct v3d_dev *v3d) V3D_MMUC_CONTROL_FLUSHING), 100); if (ret) { dev_err(v3d->drm.dev, "MMUC flush wait idle failed\n"); - return ret; + goto pm_put; } V3D_WRITE(V3D_MMU_CTL, V3D_READ(V3D_MMU_CTL) | @@ -57,6 +61,8 @@ int v3d_mmu_flush_all(struct v3d_dev *v3d) if (ret) dev_err(v3d->drm.dev, "MMU TLB clear wait idle failed\n"); +pm_put: + v3d_pm_runtime_put(v3d); return ret; } diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c index 9a3fe52558746e..b0873fe20347e7 100644 --- a/drivers/gpu/drm/v3d/v3d_perfmon.c +++ b/drivers/gpu/drm/v3d/v3d_perfmon.c @@ -235,6 +235,9 @@ void v3d_perfmon_start(struct v3d_dev *v3d, struct v3d_perfmon *perfmon) if (WARN_ON_ONCE(!perfmon || v3d->active_perfmon)) return; + if (!pm_runtime_get_if_active(v3d->drm.dev)) + return; + ncounters = perfmon->ncounters; mask = GENMASK(ncounters - 1, 0); @@ -260,6 +263,8 @@ void v3d_perfmon_start(struct v3d_dev *v3d, struct v3d_perfmon *perfmon) V3D_CORE_WRITE(0, V3D_PCTR_0_OVERFLOW, mask); v3d->active_perfmon = perfmon; + + v3d_pm_runtime_put(v3d); } void v3d_perfmon_stop(struct v3d_dev *v3d, struct v3d_perfmon *perfmon, @@ -271,10 +276,11 @@ void v3d_perfmon_stop(struct v3d_dev *v3d, struct v3d_perfmon *perfmon, return; mutex_lock(&perfmon->lock); - if (perfmon != v3d->active_perfmon) { - mutex_unlock(&perfmon->lock); - return; - } + if (perfmon != v3d->active_perfmon) + goto out; + + if (!pm_runtime_get_if_active(v3d->drm.dev)) + goto out_clear; if (capture) for (i = 0; i < perfmon->ncounters; i++) @@ -282,7 +288,11 @@ void v3d_perfmon_stop(struct v3d_dev *v3d, struct v3d_perfmon *perfmon, V3D_CORE_WRITE(0, V3D_V4_PCTR_0_EN, 0); + v3d_pm_runtime_put(v3d); + +out_clear: v3d->active_perfmon = NULL; +out: mutex_unlock(&perfmon->lock); } diff --git a/drivers/gpu/drm/v3d/v3d_power.c b/drivers/gpu/drm/v3d/v3d_power.c new file mode 100644 index 00000000000000..f3d30ef5de4ea6 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_power.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2026 Raspberry Pi */ + +#include +#include + +#include + +#include "v3d_drv.h" +#include "v3d_regs.h" + +static void +v3d_resume_sms(struct v3d_dev *v3d) +{ + if (v3d->ver < V3D_GEN_71) + return; + + V3D_SMS_WRITE(V3D_SMS_TEE_CS, V3D_SMS_CLEAR_POWER_OFF); + + if (wait_for((V3D_GET_FIELD(V3D_SMS_READ(V3D_SMS_TEE_CS), + V3D_SMS_STATE) == V3D_SMS_IDLE), 100)) { + drm_err(&v3d->drm, "Failed to power up SMS\n"); + } + + v3d_reset_sms(v3d); +} + +static void +v3d_suspend_sms(struct v3d_dev *v3d) +{ + if (v3d->ver < V3D_GEN_71) + return; + + V3D_SMS_WRITE(V3D_SMS_TEE_CS, V3D_SMS_POWER_OFF); + + if (wait_for((V3D_GET_FIELD(V3D_SMS_READ(V3D_SMS_TEE_CS), + V3D_SMS_STATE) == V3D_SMS_POWER_OFF_STATE), 100)) { + drm_err(&v3d->drm, "Failed to power off SMS\n"); + } +} + +int v3d_power_suspend(struct device *dev) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct v3d_dev *v3d = to_v3d_dev(drm); + + v3d_irq_disable(v3d); + v3d_suspend_sms(v3d); + + if (v3d->reset) + reset_control_assert(v3d->reset); + + clk_disable_unprepare(v3d->clk); + + return 0; +} + +int v3d_power_resume(struct device *dev) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct v3d_dev *v3d = to_v3d_dev(drm); + int ret; + + ret = clk_prepare_enable(v3d->clk); + if (ret) + return ret; + + if (v3d->reset) { + ret = reset_control_deassert(v3d->reset); + if (ret) + goto clk_disable; + } + + v3d_resume_sms(v3d); + v3d_mmu_set_page_table(v3d); + v3d_irq_enable(v3d); + + return 0; + +clk_disable: + clk_disable_unprepare(v3d->clk); + return ret; +} diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c index e6ca9312566680..614cb6e3cf81f8 100644 --- a/drivers/gpu/drm/v3d/v3d_submit.c +++ b/drivers/gpu/drm/v3d/v3d_submit.c @@ -5,52 +5,11 @@ */ #include -#include #include "v3d_drv.h" #include "v3d_regs.h" #include "v3d_trace.h" -static void -v3d_clock_down_work(struct work_struct *work) -{ - struct v3d_dev *v3d = - container_of(work, struct v3d_dev, clk_down_work.work); - int ret; - - ret = clk_set_min_rate(v3d->clk, v3d->clk_down_rate); - v3d->clk_up = false; - WARN_ON_ONCE(ret != 0); -} - -static void -v3d_clock_up_get(struct v3d_dev *v3d) -{ - mutex_lock(&v3d->clk_lock); - if (v3d->clk_refcount++ == 0) { - cancel_delayed_work_sync(&v3d->clk_down_work); - if (!v3d->clk_up) { - int ret; - - ret = clk_set_min_rate(v3d->clk, v3d->clk_up_rate); - WARN_ON_ONCE(ret != 0); - v3d->clk_up = true; - } - } - mutex_unlock(&v3d->clk_lock); -} - -static void -v3d_clock_up_put(struct v3d_dev *v3d) -{ - mutex_lock(&v3d->clk_lock); - if (--v3d->clk_refcount == 0) { - schedule_delayed_work(&v3d->clk_down_work, - msecs_to_jiffies(100)); - } - mutex_unlock(&v3d->clk_lock); -} - /* Takes the reservation lock on all the BOs being referenced, so that * we can attach fences and update the reservations after pushing the job * to the queue. @@ -126,10 +85,9 @@ v3d_lookup_bos(struct drm_device *dev, } static void -v3d_job_free_common(struct v3d_job *job, - bool is_gpu_job) +v3d_job_free(struct kref *ref) { - struct v3d_dev *v3d = job->v3d; + struct v3d_job *job = container_of(ref, struct v3d_job, refcount); int i; if (job->bo) { @@ -141,29 +99,13 @@ v3d_job_free_common(struct v3d_job *job, dma_fence_put(job->irq_fence); dma_fence_put(job->done_fence); - if (is_gpu_job) - v3d_clock_up_put(v3d); - if (job->perfmon) v3d_perfmon_put(job->perfmon); - kfree(job); -} - -static void -v3d_job_free(struct kref *ref) -{ - struct v3d_job *job = container_of(ref, struct v3d_job, refcount); - - v3d_job_free_common(job, true); -} - -static void -v3d_cpu_job_free(struct kref *ref) -{ - struct v3d_job *job = container_of(ref, struct v3d_job, refcount); + if (job->has_pm_ref) + v3d_pm_runtime_put(job->v3d); - v3d_job_free_common(job, false); + kfree(job); } static void @@ -244,13 +186,13 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, if (copy_from_user(&in, handle++, sizeof(in))) { ret = -EFAULT; DRM_DEBUG("Failed to copy wait dep handle.\n"); - goto fail_deps; + goto fail_job_init; } ret = drm_sched_job_add_syncobj_dependency(&job->base, file_priv, in.handle, 0); // TODO: Investigate why this was filtered out for the IOCTL. if (ret && ret != -ENOENT) - goto fail_deps; + goto fail_job_init; } } } else { @@ -258,16 +200,22 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, // TODO: Investigate why this was filtered out for the IOCTL. if (ret && ret != -ENOENT) - goto fail_deps; + goto fail_job_init; + } + + /* CPU jobs don't require hardware resources */ + if (queue != V3D_CPU) { + ret = v3d_pm_runtime_get(v3d); + if (ret) + goto fail_job_init; + job->has_pm_ref = true; } - if (queue != V3D_CPU) - v3d_clock_up_get(v3d); kref_init(&job->refcount); return 0; -fail_deps: +fail_job_init: drm_sched_job_cleanup(&job->base); return ret; } @@ -1379,7 +1327,7 @@ v3d_submit_cpu_ioctl(struct drm_device *dev, void *data, trace_v3d_submit_cpu_ioctl(&v3d->drm, cpu_job->job_type); ret = v3d_job_init(v3d, file_priv, &cpu_job->base, - v3d_cpu_job_free, 0, &se, V3D_CPU); + v3d_job_free, 0, &se, V3D_CPU); if (ret) { v3d_job_deallocate((void *)&cpu_job); goto fail; @@ -1467,14 +1415,3 @@ v3d_submit_cpu_ioctl(struct drm_device *dev, void *data, return ret; } - -void v3d_submit_init(struct drm_device *dev) { - struct v3d_dev *v3d = to_v3d_dev(dev); - - mutex_init(&v3d->clk_lock); - INIT_DELAYED_WORK(&v3d->clk_down_work, v3d_clock_down_work); - - /* kick the clock so firmware knows we are using firmware clock interface */ - v3d_clock_up_get(v3d); - v3d_clock_up_put(v3d); -} \ No newline at end of file diff --git a/drivers/pmdomain/bcm/bcm2835-power.c b/drivers/pmdomain/bcm/bcm2835-power.c index 71c23bfd64692b..3fde817ab6ae1a 100644 --- a/drivers/pmdomain/bcm/bcm2835-power.c +++ b/drivers/pmdomain/bcm/bcm2835-power.c @@ -166,8 +166,6 @@ static int bcm2835_asb_control(struct bcm2835_power *power, u32 reg, bool enable break; } - start = ktime_get_ns(); - /* Enable the module's async AXI bridges. */ if (enable) { val = readl(base + reg) & ~ASB_REQ_STOP; @@ -176,9 +174,10 @@ static int bcm2835_asb_control(struct bcm2835_power *power, u32 reg, bool enable } writel(PM_PASSWORD | val, base + reg); + start = ktime_get_ns(); while (!!(readl(base + reg) & ASB_ACK) == enable) { cpu_relax(); - if (ktime_get_ns() - start >= 1000) + if (ktime_get_ns() - start >= 5000) return -ETIMEDOUT; }