Skip to content

Commit 0c1f89d

Browse files
ArcaneNibblemarcan
authored andcommitted
jpeg: Import working basic decode experiment
Signed-off-by: R <[email protected]>
1 parent 49cdf7f commit 0c1f89d

2 files changed

Lines changed: 390 additions & 1 deletion

File tree

proxyclient/experiments/jpeg.py

Lines changed: 389 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,389 @@
1+
#!/usr/bin/env python3
2+
# SPDX-License-Identifier: MIT
3+
import sys, pathlib
4+
sys.path.append(str(pathlib.Path(__file__).resolve().parents[1]))
5+
6+
from m1n1.setup import *
7+
from m1n1.hw.dart import DART, DARTRegs
8+
from m1n1.hw.jpeg import *
9+
from m1n1.utils import *
10+
import argparse
11+
import struct
12+
import time
13+
from enum import IntEnum
14+
from PIL import Image, ImageDraw
15+
16+
17+
def divroundup(val, div):
18+
return (val + div - 1) // div
19+
20+
21+
ap = argparse.ArgumentParser(description='JPEG block experiment')
22+
ap.add_argument("--jpeg", dest='which_jpeg', type=str, default='jpeg0',
23+
help='which JPEG instance (jpeg0/jpeg1)')
24+
g = ap.add_mutually_exclusive_group(required=True)
25+
g.add_argument("-e", "--encode", action='store_true')
26+
g.add_argument("-d", "--decode", action='store_true')
27+
ap.add_argument("--raw-output", type=str, required=False)
28+
ap.add_argument("input", type=str)
29+
ap.add_argument("output", type=str)
30+
args = ap.parse_args()
31+
32+
# print(args)
33+
34+
# Perform necessary pre-parsing
35+
if args.decode:
36+
with open(args.input, 'rb') as f:
37+
jpeg_data = f.read()
38+
39+
found_sof0 = False
40+
41+
jpeg_work = jpeg_data
42+
while jpeg_work:
43+
seg_marker = struct.unpack(">H", jpeg_work[:2])[0]
44+
print(f"Seg {seg_marker:04X}")
45+
if seg_marker == 0xFFD8:
46+
# SOI
47+
jpeg_work = jpeg_work[2:]
48+
elif seg_marker == 0xFFDA:
49+
# SOS
50+
break
51+
else:
52+
seg_len = struct.unpack(">H", jpeg_work[2:4])[0]
53+
assert seg_len >= 2
54+
seg_data = jpeg_work[4:4 + seg_len - 2]
55+
jpeg_work = jpeg_work[4 + seg_len - 2:]
56+
57+
if seg_marker == 0xFFC0:
58+
# SOF0
59+
assert not found_sof0
60+
found_sof0 = True
61+
sof0 = struct.unpack(">BHHB", seg_data[:6])
62+
(jpeg_bpp, jpeg_H, jpeg_W, jpeg_components_cnt) = sof0
63+
# it is not yet verified what the requirements are for inputs
64+
assert jpeg_bpp == 8
65+
assert jpeg_components_cnt == 1 or jpeg_components_cnt == 3
66+
if jpeg_components_cnt == 1:
67+
jpeg_MODE = '400'
68+
else:
69+
jpeg_components = {}
70+
for i in range(jpeg_components_cnt):
71+
comp_id, comp_sampling, _ = seg_data[6+3*i:6+3*(i+1)]
72+
jpeg_components[comp_id] = comp_sampling
73+
assert 1 in jpeg_components
74+
comp_Y = jpeg_components[1]
75+
assert 2 in jpeg_components
76+
comp_Cb = jpeg_components[2]
77+
assert 3 in jpeg_components
78+
comp_Cr = jpeg_components[3]
79+
80+
if (comp_Y, comp_Cb, comp_Cr) == (0x11, 0x11, 0x11):
81+
jpeg_MODE = '444'
82+
elif (comp_Y, comp_Cb, comp_Cr) == (0x21, 0x11, 0x11):
83+
jpeg_MODE = '422'
84+
elif (comp_Y, comp_Cb, comp_Cr) == (0x22, 0x11, 0x11):
85+
jpeg_MODE = '420'
86+
elif (comp_Y, comp_Cb, comp_Cr) == (0x41, 0x11, 0x11):
87+
jpeg_MODE = '411'
88+
else:
89+
# TODO: 422-vertical, others???
90+
# Is it possible to implement them?
91+
print("Unsupported subsampling mode")
92+
assert False
93+
94+
assert found_sof0
95+
print(f"JPEG is {jpeg_W}x{jpeg_H} with subsampling {jpeg_MODE}")
96+
97+
if jpeg_MODE == '444':
98+
macroblock_W, macroblock_H = 8, 8
99+
elif jpeg_MODE == '400':
100+
macroblock_W, macroblock_H = 8, 8
101+
elif jpeg_MODE == '422':
102+
macroblock_W, macroblock_H = 16, 8
103+
elif jpeg_MODE == '420':
104+
macroblock_W, macroblock_H = 16, 16
105+
elif jpeg_MODE == '411':
106+
macroblock_W, macroblock_H = 32, 8
107+
else:
108+
assert False
109+
110+
# FIXME: Exactly how much extra memory do we need to allocate?
111+
surface_W = divroundup(jpeg_W, macroblock_W) * macroblock_W
112+
surface_H = divroundup(jpeg_H, macroblock_H) * macroblock_H
113+
BYTESPP = 4
114+
surface_stride = surface_W * BYTESPP
115+
116+
input_mem_sz = align_up(len(jpeg_data))
117+
print(f"Using size {input_mem_sz:08X} for JPEG data")
118+
119+
output_mem_sz = align_up(surface_stride*surface_H)
120+
print(f"Using size {output_mem_sz:08X} for output image")
121+
else:
122+
assert False
123+
# TODO
124+
125+
# Turn on the JPEG block
126+
p.pmgr_adt_clocks_enable(f'/arm-io/dart-{args.which_jpeg}')
127+
p.pmgr_adt_clocks_enable(f'/arm-io/{args.which_jpeg}')
128+
129+
dart = DART.from_adt(u, f'/arm-io/dart-{args.which_jpeg}')
130+
dart.initialize()
131+
132+
jpeg_base, _ = u.adt[f'/arm-io/{args.which_jpeg}'].get_reg(0)
133+
jpeg = JPEGRegs(u, jpeg_base)
134+
135+
136+
def reset_block():
137+
jpeg.MODE.val = 0x100
138+
jpeg.MODE.val = 0x13e
139+
140+
set_default_regs()
141+
142+
jpeg.MODE.val = 0x17f
143+
for _ in range(10000):
144+
v = jpeg.REG_0x1004.val
145+
if v == 0:
146+
break
147+
print(f"reset 1 -- {v}")
148+
if (v := jpeg.REG_0x1004.val) != 0:
149+
print(f"reset 1 failed! -- {v}")
150+
assert False
151+
152+
jpeg.RST_INTERVAL.val = 1
153+
for _ in range(2500):
154+
v = jpeg.RST_INTERVAL.val
155+
if v == 1:
156+
break
157+
print(f"reset 2 -- {v}")
158+
if (v := jpeg.RST_INTERVAL.val) != 1:
159+
print(f"reset 2 failed! -- {v}")
160+
assert False
161+
jpeg.RST_INTERVAL.val = 0
162+
163+
jpeg.ENABLE_RST_LOGGING.val = 0
164+
jpeg.REG_0x1a8.val = 0
165+
jpeg.REG_0x1ac.val = 0
166+
jpeg.REG_0x1b0.val = 0
167+
jpeg.REG_0x1b4.val = 0
168+
jpeg.REG_0x1bc.val = 0
169+
jpeg.REG_0x1c0.val = 0
170+
jpeg.REG_0x1c4.val = 0
171+
jpeg.REG_0x1c8.val = 0
172+
jpeg.REG_0x1cc.val = 0
173+
jpeg.REG_0x1d0.val = 0
174+
jpeg.REG_0x1d4.val = 0
175+
176+
jpeg.MODE.val = 0x143
177+
178+
179+
def set_default_regs(param1=0):
180+
jpeg.REG_0x0.val = 0
181+
jpeg.REG_0x0.val = 0
182+
jpeg.REG_0x4.val = 0
183+
jpeg.CODEC.val = 0
184+
jpeg.REG_0x2c.val = 0
185+
jpeg.REG_0x30.val = 0
186+
jpeg.REG_0x34.val = 1
187+
jpeg.REG_0x38.val = 1
188+
jpeg.CHROMA_HALVE_H_TYPE1.val = 0
189+
jpeg.CHROMA_HALVE_H_TYPE2.val = 0
190+
jpeg.CHROMA_HALVE_V_TYPE1.val = 0
191+
jpeg.CHROMA_HALVE_V_TYPE2.val = 0
192+
jpeg.CHROMA_DOUBLE_H.val = 0
193+
jpeg.CHROMA_QUADRUPLE_H.val = 0
194+
jpeg.CHROMA_DOUBLE_V.val = 0
195+
jpeg.REG_0x15c.val = 0
196+
jpeg.PX_USE_PLANE1.val = 0
197+
jpeg.PX_TILES_W.val = 1
198+
jpeg.PX_TILES_H.val = 1
199+
jpeg.PX_PLANE0_WIDTH.val = 1
200+
jpeg.PX_PLANE0_HEIGHT.val = 1
201+
jpeg.PX_PLANE0_TILING_H.val = 1
202+
jpeg.PX_PLANE0_TILING_V.val = 1
203+
jpeg.PX_PLANE0_STRIDE.val = 1
204+
jpeg.PX_PLANE1_WIDTH.val = 1
205+
jpeg.PX_PLANE1_HEIGHT.val = 1
206+
jpeg.PX_PLANE1_TILING_H.val = 1
207+
jpeg.PX_PLANE1_TILING_V.val = 1
208+
jpeg.PX_PLANE1_STRIDE.val = 1
209+
jpeg.INPUT_START1.val = 0
210+
jpeg.INPUT_START2.val = 0
211+
jpeg.REG_0x94.val = 1
212+
jpeg.REG_0x98.val = 1
213+
jpeg.INPUT_END.val = 0xffffffff
214+
jpeg.OUTPUT_START1.val = 0
215+
jpeg.OUTPUT_START2.val = 0
216+
jpeg.OUTPUT_END.val = 0xffffffff
217+
for i in range(11):
218+
jpeg.MATRIX_MULT[i].val = 0
219+
for i in range(10):
220+
jpeg.DITHER[i].val = 0xff
221+
jpeg.ENCODE_PIXEL_FORMAT.val = 0
222+
jpeg.ENCODE_COMPONENT0_POS.val = 0
223+
jpeg.ENCODE_COMPONENT1_POS.val = 0
224+
jpeg.ENCODE_COMPONENT2_POS.val = 0
225+
jpeg.ENCODE_COMPONENT3_POS.val = 0
226+
jpeg.CONVERT_COLOR_SPACE.val = 0
227+
jpeg.REG_0x118.val = 0
228+
jpeg.REG_0x11c.val = 0
229+
jpeg.REG_0x120.val = 0
230+
jpeg.REG_0x124.val = 0
231+
jpeg.REG_0x128.val = 0
232+
jpeg.REG_0x12c.val = 0
233+
jpeg.DECODE_MACROBLOCKS_W.val = 0
234+
jpeg.DECODE_MACROBLOCKS_H.val = 0
235+
jpeg.SCALE_FACTOR.val = 0
236+
jpeg.DECODE_PIXEL_FORMAT.val = 0
237+
jpeg.YUV422_ORDER.val = 0
238+
jpeg.RGBA_ORDER.val = 0
239+
jpeg.RGBA_ALPHA.val = 0
240+
jpeg.RIGHT_EDGE_PIXELS.val = 0
241+
jpeg.BOTTOM_EDGE_PIXELS.val = 0
242+
jpeg.RIGHT_EDGE_SAMPLES.val = 0
243+
jpeg.BOTTOM_EDGE_SAMPLES.val = 0
244+
245+
# this is always done on the m1 max hwrev
246+
jpeg.REG_0x1fc.val = 0
247+
jpeg.REG_0x200.val = 0
248+
jpeg.REG_0x204.val = 0
249+
jpeg.REG_0x208.val = 0
250+
jpeg.REG_0x214.val = 0
251+
jpeg.REG_0x218.val = 0
252+
jpeg.REG_0x21c.val = 0
253+
jpeg.REG_0x220.val = 0
254+
jpeg.REG_0x224.val = 0
255+
jpeg.REG_0x228.val = 0
256+
jpeg.REG_0x22c.val = 0
257+
jpeg.REG_0x230.val = 0
258+
jpeg.REG_0x234.val = 0x1f40
259+
jpeg.REG_0x244.val = 0
260+
jpeg.REG_0x248.val = 0
261+
jpeg.REG_0x258.val = 0
262+
jpeg.REG_0x25c.val = 0
263+
jpeg.REG_0x23c.val = 0
264+
jpeg.REG_0x240.val = 0
265+
jpeg.REG_0x250.val = 0
266+
jpeg.REG_0x254.val = 0
267+
268+
jpeg.REG_0x160.val = param1
269+
jpeg.TIMEOUT.val = 0
270+
jpeg.REG_0x20.val = 0xff
271+
272+
273+
print(f"HW revision is {jpeg.HWREV}")
274+
reset_block()
275+
276+
input_buf_phys = u.heap.memalign(0x4000, input_mem_sz)
277+
output_buf_phys = u.heap.memalign(0x4000, output_mem_sz)
278+
print(f"buffers (phys) {input_buf_phys:016X} {output_buf_phys:016X}")
279+
280+
input_buf_iova = dart.iomap(0, input_buf_phys, input_mem_sz)
281+
output_buf_iova = dart.iomap(0, output_buf_phys, output_mem_sz)
282+
print(f"buffers (iova) {input_buf_iova:08X} {output_buf_iova:08X}")
283+
# dart.dump_all()
284+
285+
iface.writemem(input_buf_phys, b'\xAA' * input_mem_sz)
286+
iface.writemem(output_buf_phys, b'\xAA' * output_mem_sz)
287+
288+
289+
if args.decode:
290+
iface.writemem(input_buf_phys, jpeg_data)
291+
print("JPEG uploaded")
292+
293+
jpeg.REG_0x34 = 1
294+
jpeg.REG_0x2c = 0
295+
jpeg.REG_0x38 = 0
296+
jpeg.CODEC.set(CODEC=E_CODEC._444)
297+
jpeg.DECODE_PIXEL_FORMAT.set(FORMAT=E_DECODE_PIXEL_FORMAT.RGBA8888)
298+
299+
jpeg.PX_USE_PLANE1 = 0
300+
jpeg.PX_PLANE0_WIDTH = jpeg_W*BYTESPP - 1
301+
jpeg.PX_PLANE0_HEIGHT = jpeg_H - 1
302+
# TODO P1
303+
jpeg.TIMEOUT.val = 266000000
304+
305+
jpeg.REG_0x94 = 0x1f
306+
jpeg.REG_0x98 = 1
307+
308+
jpeg.DECODE_MACROBLOCKS_W.val = divroundup(jpeg_W, macroblock_W)
309+
jpeg.DECODE_MACROBLOCKS_H.val = divroundup(jpeg_H, macroblock_H)
310+
# right_edge_px = jpeg_W - divroundup(jpeg_W, 8)*8 + 8
311+
# bot_edge_px = jpeg_H - divroundup(jpeg_H, 8)*8 + 8
312+
# # XXX changing this does not seem to do anything
313+
# jpeg.RIGHT_EDGE_PIXELS.val = right_edge_px
314+
# jpeg.BOTTOM_EDGE_PIXELS.val = bot_edge_px
315+
# jpeg.RIGHT_EDGE_SAMPLES.val = right_edge_px // 2
316+
# jpeg.BOTTOM_EDGE_SAMPLES.val = bot_edge_px // 2
317+
318+
jpeg.PX_TILES_H.val = divroundup(jpeg_H, macroblock_W)
319+
jpeg.PX_TILES_W.val = divroundup(jpeg_W, macroblock_H)
320+
jpeg.PX_PLANE0_TILING_H.val = 4
321+
jpeg.PX_PLANE0_TILING_V.val = 8
322+
jpeg.PX_PLANE1_TILING_H.val = 1
323+
jpeg.PX_PLANE1_TILING_V.val = 1
324+
325+
jpeg.MATRIX_MULT[0].val = 0x100
326+
jpeg.MATRIX_MULT[1].val = 0x0
327+
jpeg.MATRIX_MULT[2].val = 0x167
328+
jpeg.MATRIX_MULT[3].val = 0x100
329+
jpeg.MATRIX_MULT[4].val = 0xffffffa8
330+
jpeg.MATRIX_MULT[5].val = 0xffffff49
331+
jpeg.MATRIX_MULT[6].val = 0x100
332+
jpeg.MATRIX_MULT[7].val = 0x1c6
333+
jpeg.MATRIX_MULT[8].val = 0x0
334+
jpeg.MATRIX_MULT[9].val = 0x0
335+
jpeg.MATRIX_MULT[10].val = 0xffffff80
336+
337+
jpeg.RGBA_ALPHA.val = 0xff
338+
jpeg.RGBA_ORDER.val = 1
339+
340+
jpeg.SCALE_FACTOR.val = 0
341+
342+
jpeg.INPUT_START1.val = input_buf_iova
343+
jpeg.INPUT_START2.val = 0xdeadbeef
344+
jpeg.INPUT_END.val = input_buf_iova + input_mem_sz
345+
jpeg.OUTPUT_START1.val = output_buf_iova
346+
# jpeg.OUTPUT_START2.val = output_buf_iova + jpeg_W * 4 # HACK
347+
jpeg.OUTPUT_START2.val = 0xdeadbeef
348+
jpeg.OUTPUT_END.val = output_buf_iova + output_mem_sz
349+
jpeg.PX_PLANE0_STRIDE.val = surface_stride
350+
# jpeg.PX_PLANE1_STRIDE.val = output_W * 4 # HACK
351+
352+
jpeg.REG_0x1ac.val = 0x0
353+
jpeg.REG_0x1b0.val = 0x0
354+
jpeg.REG_0x1b4.val = 0x0
355+
jpeg.REG_0x1bc.val = 0x0
356+
jpeg.REG_0x1c0.val = 0x0
357+
jpeg.REG_0x1c4.val = 0x0
358+
359+
jpeg.REG_0x118.val = 0x0
360+
jpeg.REG_0x11c.val = 0x1
361+
362+
jpeg.MODE.val = 0x177
363+
jpeg.REG_0x1028.val = 0x400
364+
365+
jpeg.JPEG_IO_FLAGS.val = 0x3f
366+
jpeg.REG_0x0.val = 0x1
367+
jpeg.REG_0x1004 = 0x1
368+
369+
# FIXME: we don't actually know when it's done
370+
time.sleep(1)
371+
372+
print(jpeg.STATUS.reg)
373+
print(jpeg.PERFCOUNTER.reg)
374+
375+
output_data = iface.readmem(output_buf_phys, output_mem_sz)
376+
if args.raw_output is not None:
377+
with open(args.raw_output, 'wb') as f:
378+
f.write(output_data)
379+
380+
with Image.new(mode='RGBA', size=(jpeg_W, jpeg_H)) as im:
381+
for y in range(jpeg_H):
382+
for x in range(jpeg_W):
383+
block = output_data[
384+
y*surface_stride + x*BYTESPP:
385+
y*surface_stride + (x+1)*BYTESPP]
386+
387+
r, g, b, a = block
388+
im.putpixel((x, y), (r, g, b, a))
389+
im.save(args.output)

proxyclient/m1n1/hw/jpeg.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ class E_DECODE_PIXEL_FORMAT(IntEnum):
5555
YUV420_planar = 2
5656
YUV422_linear = 3
5757
_YUV10_broken_doesnt_work = 4
58-
RGBA888 = 5
58+
RGBA8888 = 5
5959
RGB565 = 6
6060
_RGB101010_broken_doesnt_work = 7
6161

0 commit comments

Comments
 (0)