|
| 1 | +#!/usr/bin/env python3 |
| 2 | +# SPDX-License-Identifier: MIT |
| 3 | +import sys, pathlib |
| 4 | +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) |
| 5 | + |
| 6 | +from m1n1.setup import * |
| 7 | +from m1n1.hw.dart import DART, DARTRegs |
| 8 | +from m1n1.hw.jpeg import * |
| 9 | +from m1n1.utils import * |
| 10 | +import argparse |
| 11 | +import struct |
| 12 | +import time |
| 13 | +from enum import IntEnum |
| 14 | +from PIL import Image, ImageDraw |
| 15 | + |
| 16 | + |
| 17 | +def divroundup(val, div): |
| 18 | + return (val + div - 1) // div |
| 19 | + |
| 20 | + |
| 21 | +ap = argparse.ArgumentParser(description='JPEG block experiment') |
| 22 | +ap.add_argument("--jpeg", dest='which_jpeg', type=str, default='jpeg0', |
| 23 | + help='which JPEG instance (jpeg0/jpeg1)') |
| 24 | +g = ap.add_mutually_exclusive_group(required=True) |
| 25 | +g.add_argument("-e", "--encode", action='store_true') |
| 26 | +g.add_argument("-d", "--decode", action='store_true') |
| 27 | +ap.add_argument("--raw-output", type=str, required=False) |
| 28 | +ap.add_argument("input", type=str) |
| 29 | +ap.add_argument("output", type=str) |
| 30 | +args = ap.parse_args() |
| 31 | + |
| 32 | +# print(args) |
| 33 | + |
| 34 | +# Perform necessary pre-parsing |
| 35 | +if args.decode: |
| 36 | + with open(args.input, 'rb') as f: |
| 37 | + jpeg_data = f.read() |
| 38 | + |
| 39 | + found_sof0 = False |
| 40 | + |
| 41 | + jpeg_work = jpeg_data |
| 42 | + while jpeg_work: |
| 43 | + seg_marker = struct.unpack(">H", jpeg_work[:2])[0] |
| 44 | + print(f"Seg {seg_marker:04X}") |
| 45 | + if seg_marker == 0xFFD8: |
| 46 | + # SOI |
| 47 | + jpeg_work = jpeg_work[2:] |
| 48 | + elif seg_marker == 0xFFDA: |
| 49 | + # SOS |
| 50 | + break |
| 51 | + else: |
| 52 | + seg_len = struct.unpack(">H", jpeg_work[2:4])[0] |
| 53 | + assert seg_len >= 2 |
| 54 | + seg_data = jpeg_work[4:4 + seg_len - 2] |
| 55 | + jpeg_work = jpeg_work[4 + seg_len - 2:] |
| 56 | + |
| 57 | + if seg_marker == 0xFFC0: |
| 58 | + # SOF0 |
| 59 | + assert not found_sof0 |
| 60 | + found_sof0 = True |
| 61 | + sof0 = struct.unpack(">BHHB", seg_data[:6]) |
| 62 | + (jpeg_bpp, jpeg_H, jpeg_W, jpeg_components_cnt) = sof0 |
| 63 | + # it is not yet verified what the requirements are for inputs |
| 64 | + assert jpeg_bpp == 8 |
| 65 | + assert jpeg_components_cnt == 1 or jpeg_components_cnt == 3 |
| 66 | + if jpeg_components_cnt == 1: |
| 67 | + jpeg_MODE = '400' |
| 68 | + else: |
| 69 | + jpeg_components = {} |
| 70 | + for i in range(jpeg_components_cnt): |
| 71 | + comp_id, comp_sampling, _ = seg_data[6+3*i:6+3*(i+1)] |
| 72 | + jpeg_components[comp_id] = comp_sampling |
| 73 | + assert 1 in jpeg_components |
| 74 | + comp_Y = jpeg_components[1] |
| 75 | + assert 2 in jpeg_components |
| 76 | + comp_Cb = jpeg_components[2] |
| 77 | + assert 3 in jpeg_components |
| 78 | + comp_Cr = jpeg_components[3] |
| 79 | + |
| 80 | + if (comp_Y, comp_Cb, comp_Cr) == (0x11, 0x11, 0x11): |
| 81 | + jpeg_MODE = '444' |
| 82 | + elif (comp_Y, comp_Cb, comp_Cr) == (0x21, 0x11, 0x11): |
| 83 | + jpeg_MODE = '422' |
| 84 | + elif (comp_Y, comp_Cb, comp_Cr) == (0x22, 0x11, 0x11): |
| 85 | + jpeg_MODE = '420' |
| 86 | + elif (comp_Y, comp_Cb, comp_Cr) == (0x41, 0x11, 0x11): |
| 87 | + jpeg_MODE = '411' |
| 88 | + else: |
| 89 | + # TODO: 422-vertical, others??? |
| 90 | + # Is it possible to implement them? |
| 91 | + print("Unsupported subsampling mode") |
| 92 | + assert False |
| 93 | + |
| 94 | + assert found_sof0 |
| 95 | + print(f"JPEG is {jpeg_W}x{jpeg_H} with subsampling {jpeg_MODE}") |
| 96 | + |
| 97 | + if jpeg_MODE == '444': |
| 98 | + macroblock_W, macroblock_H = 8, 8 |
| 99 | + elif jpeg_MODE == '400': |
| 100 | + macroblock_W, macroblock_H = 8, 8 |
| 101 | + elif jpeg_MODE == '422': |
| 102 | + macroblock_W, macroblock_H = 16, 8 |
| 103 | + elif jpeg_MODE == '420': |
| 104 | + macroblock_W, macroblock_H = 16, 16 |
| 105 | + elif jpeg_MODE == '411': |
| 106 | + macroblock_W, macroblock_H = 32, 8 |
| 107 | + else: |
| 108 | + assert False |
| 109 | + |
| 110 | + # FIXME: Exactly how much extra memory do we need to allocate? |
| 111 | + surface_W = divroundup(jpeg_W, macroblock_W) * macroblock_W |
| 112 | + surface_H = divroundup(jpeg_H, macroblock_H) * macroblock_H |
| 113 | + BYTESPP = 4 |
| 114 | + surface_stride = surface_W * BYTESPP |
| 115 | + |
| 116 | + input_mem_sz = align_up(len(jpeg_data)) |
| 117 | + print(f"Using size {input_mem_sz:08X} for JPEG data") |
| 118 | + |
| 119 | + output_mem_sz = align_up(surface_stride*surface_H) |
| 120 | + print(f"Using size {output_mem_sz:08X} for output image") |
| 121 | +else: |
| 122 | + assert False |
| 123 | + # TODO |
| 124 | + |
| 125 | +# Turn on the JPEG block |
| 126 | +p.pmgr_adt_clocks_enable(f'/arm-io/dart-{args.which_jpeg}') |
| 127 | +p.pmgr_adt_clocks_enable(f'/arm-io/{args.which_jpeg}') |
| 128 | + |
| 129 | +dart = DART.from_adt(u, f'/arm-io/dart-{args.which_jpeg}') |
| 130 | +dart.initialize() |
| 131 | + |
| 132 | +jpeg_base, _ = u.adt[f'/arm-io/{args.which_jpeg}'].get_reg(0) |
| 133 | +jpeg = JPEGRegs(u, jpeg_base) |
| 134 | + |
| 135 | + |
| 136 | +def reset_block(): |
| 137 | + jpeg.MODE.val = 0x100 |
| 138 | + jpeg.MODE.val = 0x13e |
| 139 | + |
| 140 | + set_default_regs() |
| 141 | + |
| 142 | + jpeg.MODE.val = 0x17f |
| 143 | + for _ in range(10000): |
| 144 | + v = jpeg.REG_0x1004.val |
| 145 | + if v == 0: |
| 146 | + break |
| 147 | + print(f"reset 1 -- {v}") |
| 148 | + if (v := jpeg.REG_0x1004.val) != 0: |
| 149 | + print(f"reset 1 failed! -- {v}") |
| 150 | + assert False |
| 151 | + |
| 152 | + jpeg.RST_INTERVAL.val = 1 |
| 153 | + for _ in range(2500): |
| 154 | + v = jpeg.RST_INTERVAL.val |
| 155 | + if v == 1: |
| 156 | + break |
| 157 | + print(f"reset 2 -- {v}") |
| 158 | + if (v := jpeg.RST_INTERVAL.val) != 1: |
| 159 | + print(f"reset 2 failed! -- {v}") |
| 160 | + assert False |
| 161 | + jpeg.RST_INTERVAL.val = 0 |
| 162 | + |
| 163 | + jpeg.ENABLE_RST_LOGGING.val = 0 |
| 164 | + jpeg.REG_0x1a8.val = 0 |
| 165 | + jpeg.REG_0x1ac.val = 0 |
| 166 | + jpeg.REG_0x1b0.val = 0 |
| 167 | + jpeg.REG_0x1b4.val = 0 |
| 168 | + jpeg.REG_0x1bc.val = 0 |
| 169 | + jpeg.REG_0x1c0.val = 0 |
| 170 | + jpeg.REG_0x1c4.val = 0 |
| 171 | + jpeg.REG_0x1c8.val = 0 |
| 172 | + jpeg.REG_0x1cc.val = 0 |
| 173 | + jpeg.REG_0x1d0.val = 0 |
| 174 | + jpeg.REG_0x1d4.val = 0 |
| 175 | + |
| 176 | + jpeg.MODE.val = 0x143 |
| 177 | + |
| 178 | + |
| 179 | +def set_default_regs(param1=0): |
| 180 | + jpeg.REG_0x0.val = 0 |
| 181 | + jpeg.REG_0x0.val = 0 |
| 182 | + jpeg.REG_0x4.val = 0 |
| 183 | + jpeg.CODEC.val = 0 |
| 184 | + jpeg.REG_0x2c.val = 0 |
| 185 | + jpeg.REG_0x30.val = 0 |
| 186 | + jpeg.REG_0x34.val = 1 |
| 187 | + jpeg.REG_0x38.val = 1 |
| 188 | + jpeg.CHROMA_HALVE_H_TYPE1.val = 0 |
| 189 | + jpeg.CHROMA_HALVE_H_TYPE2.val = 0 |
| 190 | + jpeg.CHROMA_HALVE_V_TYPE1.val = 0 |
| 191 | + jpeg.CHROMA_HALVE_V_TYPE2.val = 0 |
| 192 | + jpeg.CHROMA_DOUBLE_H.val = 0 |
| 193 | + jpeg.CHROMA_QUADRUPLE_H.val = 0 |
| 194 | + jpeg.CHROMA_DOUBLE_V.val = 0 |
| 195 | + jpeg.REG_0x15c.val = 0 |
| 196 | + jpeg.PX_USE_PLANE1.val = 0 |
| 197 | + jpeg.PX_TILES_W.val = 1 |
| 198 | + jpeg.PX_TILES_H.val = 1 |
| 199 | + jpeg.PX_PLANE0_WIDTH.val = 1 |
| 200 | + jpeg.PX_PLANE0_HEIGHT.val = 1 |
| 201 | + jpeg.PX_PLANE0_TILING_H.val = 1 |
| 202 | + jpeg.PX_PLANE0_TILING_V.val = 1 |
| 203 | + jpeg.PX_PLANE0_STRIDE.val = 1 |
| 204 | + jpeg.PX_PLANE1_WIDTH.val = 1 |
| 205 | + jpeg.PX_PLANE1_HEIGHT.val = 1 |
| 206 | + jpeg.PX_PLANE1_TILING_H.val = 1 |
| 207 | + jpeg.PX_PLANE1_TILING_V.val = 1 |
| 208 | + jpeg.PX_PLANE1_STRIDE.val = 1 |
| 209 | + jpeg.INPUT_START1.val = 0 |
| 210 | + jpeg.INPUT_START2.val = 0 |
| 211 | + jpeg.REG_0x94.val = 1 |
| 212 | + jpeg.REG_0x98.val = 1 |
| 213 | + jpeg.INPUT_END.val = 0xffffffff |
| 214 | + jpeg.OUTPUT_START1.val = 0 |
| 215 | + jpeg.OUTPUT_START2.val = 0 |
| 216 | + jpeg.OUTPUT_END.val = 0xffffffff |
| 217 | + for i in range(11): |
| 218 | + jpeg.MATRIX_MULT[i].val = 0 |
| 219 | + for i in range(10): |
| 220 | + jpeg.DITHER[i].val = 0xff |
| 221 | + jpeg.ENCODE_PIXEL_FORMAT.val = 0 |
| 222 | + jpeg.ENCODE_COMPONENT0_POS.val = 0 |
| 223 | + jpeg.ENCODE_COMPONENT1_POS.val = 0 |
| 224 | + jpeg.ENCODE_COMPONENT2_POS.val = 0 |
| 225 | + jpeg.ENCODE_COMPONENT3_POS.val = 0 |
| 226 | + jpeg.CONVERT_COLOR_SPACE.val = 0 |
| 227 | + jpeg.REG_0x118.val = 0 |
| 228 | + jpeg.REG_0x11c.val = 0 |
| 229 | + jpeg.REG_0x120.val = 0 |
| 230 | + jpeg.REG_0x124.val = 0 |
| 231 | + jpeg.REG_0x128.val = 0 |
| 232 | + jpeg.REG_0x12c.val = 0 |
| 233 | + jpeg.DECODE_MACROBLOCKS_W.val = 0 |
| 234 | + jpeg.DECODE_MACROBLOCKS_H.val = 0 |
| 235 | + jpeg.SCALE_FACTOR.val = 0 |
| 236 | + jpeg.DECODE_PIXEL_FORMAT.val = 0 |
| 237 | + jpeg.YUV422_ORDER.val = 0 |
| 238 | + jpeg.RGBA_ORDER.val = 0 |
| 239 | + jpeg.RGBA_ALPHA.val = 0 |
| 240 | + jpeg.RIGHT_EDGE_PIXELS.val = 0 |
| 241 | + jpeg.BOTTOM_EDGE_PIXELS.val = 0 |
| 242 | + jpeg.RIGHT_EDGE_SAMPLES.val = 0 |
| 243 | + jpeg.BOTTOM_EDGE_SAMPLES.val = 0 |
| 244 | + |
| 245 | + # this is always done on the m1 max hwrev |
| 246 | + jpeg.REG_0x1fc.val = 0 |
| 247 | + jpeg.REG_0x200.val = 0 |
| 248 | + jpeg.REG_0x204.val = 0 |
| 249 | + jpeg.REG_0x208.val = 0 |
| 250 | + jpeg.REG_0x214.val = 0 |
| 251 | + jpeg.REG_0x218.val = 0 |
| 252 | + jpeg.REG_0x21c.val = 0 |
| 253 | + jpeg.REG_0x220.val = 0 |
| 254 | + jpeg.REG_0x224.val = 0 |
| 255 | + jpeg.REG_0x228.val = 0 |
| 256 | + jpeg.REG_0x22c.val = 0 |
| 257 | + jpeg.REG_0x230.val = 0 |
| 258 | + jpeg.REG_0x234.val = 0x1f40 |
| 259 | + jpeg.REG_0x244.val = 0 |
| 260 | + jpeg.REG_0x248.val = 0 |
| 261 | + jpeg.REG_0x258.val = 0 |
| 262 | + jpeg.REG_0x25c.val = 0 |
| 263 | + jpeg.REG_0x23c.val = 0 |
| 264 | + jpeg.REG_0x240.val = 0 |
| 265 | + jpeg.REG_0x250.val = 0 |
| 266 | + jpeg.REG_0x254.val = 0 |
| 267 | + |
| 268 | + jpeg.REG_0x160.val = param1 |
| 269 | + jpeg.TIMEOUT.val = 0 |
| 270 | + jpeg.REG_0x20.val = 0xff |
| 271 | + |
| 272 | + |
| 273 | +print(f"HW revision is {jpeg.HWREV}") |
| 274 | +reset_block() |
| 275 | + |
| 276 | +input_buf_phys = u.heap.memalign(0x4000, input_mem_sz) |
| 277 | +output_buf_phys = u.heap.memalign(0x4000, output_mem_sz) |
| 278 | +print(f"buffers (phys) {input_buf_phys:016X} {output_buf_phys:016X}") |
| 279 | + |
| 280 | +input_buf_iova = dart.iomap(0, input_buf_phys, input_mem_sz) |
| 281 | +output_buf_iova = dart.iomap(0, output_buf_phys, output_mem_sz) |
| 282 | +print(f"buffers (iova) {input_buf_iova:08X} {output_buf_iova:08X}") |
| 283 | +# dart.dump_all() |
| 284 | + |
| 285 | +iface.writemem(input_buf_phys, b'\xAA' * input_mem_sz) |
| 286 | +iface.writemem(output_buf_phys, b'\xAA' * output_mem_sz) |
| 287 | + |
| 288 | + |
| 289 | +if args.decode: |
| 290 | + iface.writemem(input_buf_phys, jpeg_data) |
| 291 | + print("JPEG uploaded") |
| 292 | + |
| 293 | + jpeg.REG_0x34 = 1 |
| 294 | + jpeg.REG_0x2c = 0 |
| 295 | + jpeg.REG_0x38 = 0 |
| 296 | + jpeg.CODEC.set(CODEC=E_CODEC._444) |
| 297 | + jpeg.DECODE_PIXEL_FORMAT.set(FORMAT=E_DECODE_PIXEL_FORMAT.RGBA8888) |
| 298 | + |
| 299 | + jpeg.PX_USE_PLANE1 = 0 |
| 300 | + jpeg.PX_PLANE0_WIDTH = jpeg_W*BYTESPP - 1 |
| 301 | + jpeg.PX_PLANE0_HEIGHT = jpeg_H - 1 |
| 302 | + # TODO P1 |
| 303 | + jpeg.TIMEOUT.val = 266000000 |
| 304 | + |
| 305 | + jpeg.REG_0x94 = 0x1f |
| 306 | + jpeg.REG_0x98 = 1 |
| 307 | + |
| 308 | + jpeg.DECODE_MACROBLOCKS_W.val = divroundup(jpeg_W, macroblock_W) |
| 309 | + jpeg.DECODE_MACROBLOCKS_H.val = divroundup(jpeg_H, macroblock_H) |
| 310 | + # right_edge_px = jpeg_W - divroundup(jpeg_W, 8)*8 + 8 |
| 311 | + # bot_edge_px = jpeg_H - divroundup(jpeg_H, 8)*8 + 8 |
| 312 | + # # XXX changing this does not seem to do anything |
| 313 | + # jpeg.RIGHT_EDGE_PIXELS.val = right_edge_px |
| 314 | + # jpeg.BOTTOM_EDGE_PIXELS.val = bot_edge_px |
| 315 | + # jpeg.RIGHT_EDGE_SAMPLES.val = right_edge_px // 2 |
| 316 | + # jpeg.BOTTOM_EDGE_SAMPLES.val = bot_edge_px // 2 |
| 317 | + |
| 318 | + jpeg.PX_TILES_H.val = divroundup(jpeg_H, macroblock_W) |
| 319 | + jpeg.PX_TILES_W.val = divroundup(jpeg_W, macroblock_H) |
| 320 | + jpeg.PX_PLANE0_TILING_H.val = 4 |
| 321 | + jpeg.PX_PLANE0_TILING_V.val = 8 |
| 322 | + jpeg.PX_PLANE1_TILING_H.val = 1 |
| 323 | + jpeg.PX_PLANE1_TILING_V.val = 1 |
| 324 | + |
| 325 | + jpeg.MATRIX_MULT[0].val = 0x100 |
| 326 | + jpeg.MATRIX_MULT[1].val = 0x0 |
| 327 | + jpeg.MATRIX_MULT[2].val = 0x167 |
| 328 | + jpeg.MATRIX_MULT[3].val = 0x100 |
| 329 | + jpeg.MATRIX_MULT[4].val = 0xffffffa8 |
| 330 | + jpeg.MATRIX_MULT[5].val = 0xffffff49 |
| 331 | + jpeg.MATRIX_MULT[6].val = 0x100 |
| 332 | + jpeg.MATRIX_MULT[7].val = 0x1c6 |
| 333 | + jpeg.MATRIX_MULT[8].val = 0x0 |
| 334 | + jpeg.MATRIX_MULT[9].val = 0x0 |
| 335 | + jpeg.MATRIX_MULT[10].val = 0xffffff80 |
| 336 | + |
| 337 | + jpeg.RGBA_ALPHA.val = 0xff |
| 338 | + jpeg.RGBA_ORDER.val = 1 |
| 339 | + |
| 340 | + jpeg.SCALE_FACTOR.val = 0 |
| 341 | + |
| 342 | + jpeg.INPUT_START1.val = input_buf_iova |
| 343 | + jpeg.INPUT_START2.val = 0xdeadbeef |
| 344 | + jpeg.INPUT_END.val = input_buf_iova + input_mem_sz |
| 345 | + jpeg.OUTPUT_START1.val = output_buf_iova |
| 346 | + # jpeg.OUTPUT_START2.val = output_buf_iova + jpeg_W * 4 # HACK |
| 347 | + jpeg.OUTPUT_START2.val = 0xdeadbeef |
| 348 | + jpeg.OUTPUT_END.val = output_buf_iova + output_mem_sz |
| 349 | + jpeg.PX_PLANE0_STRIDE.val = surface_stride |
| 350 | + # jpeg.PX_PLANE1_STRIDE.val = output_W * 4 # HACK |
| 351 | + |
| 352 | + jpeg.REG_0x1ac.val = 0x0 |
| 353 | + jpeg.REG_0x1b0.val = 0x0 |
| 354 | + jpeg.REG_0x1b4.val = 0x0 |
| 355 | + jpeg.REG_0x1bc.val = 0x0 |
| 356 | + jpeg.REG_0x1c0.val = 0x0 |
| 357 | + jpeg.REG_0x1c4.val = 0x0 |
| 358 | + |
| 359 | + jpeg.REG_0x118.val = 0x0 |
| 360 | + jpeg.REG_0x11c.val = 0x1 |
| 361 | + |
| 362 | + jpeg.MODE.val = 0x177 |
| 363 | + jpeg.REG_0x1028.val = 0x400 |
| 364 | + |
| 365 | + jpeg.JPEG_IO_FLAGS.val = 0x3f |
| 366 | + jpeg.REG_0x0.val = 0x1 |
| 367 | + jpeg.REG_0x1004 = 0x1 |
| 368 | + |
| 369 | + # FIXME: we don't actually know when it's done |
| 370 | + time.sleep(1) |
| 371 | + |
| 372 | + print(jpeg.STATUS.reg) |
| 373 | + print(jpeg.PERFCOUNTER.reg) |
| 374 | + |
| 375 | + output_data = iface.readmem(output_buf_phys, output_mem_sz) |
| 376 | + if args.raw_output is not None: |
| 377 | + with open(args.raw_output, 'wb') as f: |
| 378 | + f.write(output_data) |
| 379 | + |
| 380 | + with Image.new(mode='RGBA', size=(jpeg_W, jpeg_H)) as im: |
| 381 | + for y in range(jpeg_H): |
| 382 | + for x in range(jpeg_W): |
| 383 | + block = output_data[ |
| 384 | + y*surface_stride + x*BYTESPP: |
| 385 | + y*surface_stride + (x+1)*BYTESPP] |
| 386 | + |
| 387 | + r, g, b, a = block |
| 388 | + im.putpixel((x, y), (r, g, b, a)) |
| 389 | + im.save(args.output) |
0 commit comments