Skip to content

Commit 446c6a2

Browse files
arndbmchehab
authored andcommitted
media: rkvdec: reduce excessive stack usage in assemble_hw_pps()
The rkvdec_pps had a large set of bitfields, all of which as misaligned. This causes clang-21 and likely other versions to produce absolutely awful object code and a warning about very large stack usage, on targets without unaligned access: drivers/media/platform/rockchip/rkvdec/rkvdec-vp9.c:966:12: error: stack frame size (1472) exceeds limit (1280) in 'rkvdec_vp9_start' [-Werror,-Wframe-larger-than] Part of the problem here is how all the bitfield accesses are inlined into a function that already has large structures on the stack. Mark set_field_order_cnt() as noinline_for_stack, and split out the following accesses in assemble_hw_pps() into another noinline function, both of which now using around 800 bytes of stack in the same configuration. There is clearly still something wrong with clang here, but splitting it into multiple functions reduces the risk of stack overflow. Fixes: fde2490 ("media: rkvdec: Add H264 support for the VDPU383 variant") Link: https://godbolt.org/z/acP1eKeq9 Signed-off-by: Arnd Bergmann <[email protected]> Reviewed-by: Nicolas Dufresne <[email protected]> Signed-off-by: Nicolas Dufresne <[email protected]> Signed-off-by: Mauro Carvalho Chehab <[email protected]>
1 parent daa87ca commit 446c6a2

1 file changed

Lines changed: 27 additions & 23 deletions

File tree

drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ struct rkvdec_h264_ctx {
130130
struct vdpu383_regs_h26x regs;
131131
};
132132

133-
static void set_field_order_cnt(struct rkvdec_pps *pps, const struct v4l2_h264_dpb_entry *dpb)
133+
static noinline_for_stack void set_field_order_cnt(struct rkvdec_pps *pps, const struct v4l2_h264_dpb_entry *dpb)
134134
{
135135
pps->top_field_order_cnt0 = dpb[0].top_field_order_cnt;
136136
pps->bot_field_order_cnt0 = dpb[0].bottom_field_order_cnt;
@@ -166,6 +166,31 @@ static void set_field_order_cnt(struct rkvdec_pps *pps, const struct v4l2_h264_d
166166
pps->bot_field_order_cnt15 = dpb[15].bottom_field_order_cnt;
167167
}
168168

169+
static noinline_for_stack void set_dec_params(struct rkvdec_pps *pps, const struct v4l2_ctrl_h264_decode_params *dec_params)
170+
{
171+
const struct v4l2_h264_dpb_entry *dpb = dec_params->dpb;
172+
173+
for (int i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) {
174+
if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
175+
pps->is_longterm |= (1 << i);
176+
pps->ref_field_flags |=
177+
(!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD)) << i;
178+
pps->ref_colmv_use_flag |=
179+
(!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) << i;
180+
pps->ref_topfield_used |=
181+
(!!(dpb[i].fields & V4L2_H264_TOP_FIELD_REF)) << i;
182+
pps->ref_botfield_used |=
183+
(!!(dpb[i].fields & V4L2_H264_BOTTOM_FIELD_REF)) << i;
184+
}
185+
pps->pic_field_flag =
186+
!!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC);
187+
pps->pic_associated_flag =
188+
!!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD);
189+
190+
pps->cur_top_field = dec_params->top_field_order_cnt;
191+
pps->cur_bot_field = dec_params->bottom_field_order_cnt;
192+
}
193+
169194
static void assemble_hw_pps(struct rkvdec_ctx *ctx,
170195
struct rkvdec_h264_run *run)
171196
{
@@ -177,7 +202,6 @@ static void assemble_hw_pps(struct rkvdec_ctx *ctx,
177202
struct rkvdec_h264_priv_tbl *priv_tbl = h264_ctx->priv_tbl.cpu;
178203
struct rkvdec_sps_pps *hw_ps;
179204
u32 pic_width, pic_height;
180-
u32 i;
181205

182206
/*
183207
* HW read the SPS/PPS information from PPS packet index by PPS id.
@@ -261,28 +285,8 @@ static void assemble_hw_pps(struct rkvdec_ctx *ctx,
261285
!!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT);
262286

263287
set_field_order_cnt(&hw_ps->pps, dpb);
288+
set_dec_params(&hw_ps->pps, dec_params);
264289

265-
for (i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) {
266-
if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
267-
hw_ps->pps.is_longterm |= (1 << i);
268-
269-
hw_ps->pps.ref_field_flags |=
270-
(!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD)) << i;
271-
hw_ps->pps.ref_colmv_use_flag |=
272-
(!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) << i;
273-
hw_ps->pps.ref_topfield_used |=
274-
(!!(dpb[i].fields & V4L2_H264_TOP_FIELD_REF)) << i;
275-
hw_ps->pps.ref_botfield_used |=
276-
(!!(dpb[i].fields & V4L2_H264_BOTTOM_FIELD_REF)) << i;
277-
}
278-
279-
hw_ps->pps.pic_field_flag =
280-
!!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC);
281-
hw_ps->pps.pic_associated_flag =
282-
!!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD);
283-
284-
hw_ps->pps.cur_top_field = dec_params->top_field_order_cnt;
285-
hw_ps->pps.cur_bot_field = dec_params->bottom_field_order_cnt;
286290
}
287291

288292
static void rkvdec_write_regs(struct rkvdec_ctx *ctx)

0 commit comments

Comments
 (0)