Skip to content

Commit a7ac926

Browse files
committed
(RWEBP) Mean diff dropped from 97.4 to 82.7 — the 64-bit BD was the missing piece! The file had silently reverted to the 16-bit BD during earlier test iterations, and the 16-bit BD produces different results at boundary-case splits after ~1920 reads.
All 12 fixes are now cleanly applied:
1 parent 0f12b78 commit a7ac926

1 file changed

Lines changed: 60 additions & 80 deletions

File tree

  • libretro-common/formats/webp

libretro-common/formats/webp/rwebp.c

Lines changed: 60 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -608,39 +608,48 @@ static uint32_t *vl_decode_full(const uint8_t *data, size_t len,
608608

609609
/* ===== VP8 Lossy — full decode with coefficients ===== */
610610

611-
typedef struct { const uint8_t *buf, *end; uint32_t range, value; int bit_count; } vp8b;
611+
typedef struct { const uint8_t *buf, *end; uint32_t range; uint64_t value; int count; } vp8b;
612+
613+
static void vp8b_fill(vp8b *b)
614+
{
615+
int shift = 48 - b->count;
616+
while (shift >= 0 && b->buf < b->end)
617+
{
618+
b->count += 8;
619+
b->value |= (uint64_t)(*b->buf++) << shift;
620+
shift -= 8;
621+
}
622+
}
612623

613624
static void vp8b_init(vp8b *b, const uint8_t *d, size_t s)
614625
{
615626
b->buf = d; b->end = d + s; b->range = 255;
616-
b->value = 0; b->bit_count = 0;
617-
if (b->buf < b->end) { b->value = (uint32_t)(*b->buf++) << 8; }
618-
if (b->buf < b->end) { b->value |= (uint32_t)(*b->buf++); }
627+
b->value = 0; b->count = -8;
628+
vp8b_fill(b);
619629
}
620630

621631
static INLINE int vp8b_get(vp8b *b, int prob)
622632
{
623633
uint32_t split = 1 + (((b->range - 1) * (uint32_t)prob) >> 8);
624-
uint32_t bigsplit = split << 8;
625-
int bit;
634+
uint64_t bigsplit = (uint64_t)split << 56;
635+
int bit, shift;
626636
if (b->value >= bigsplit)
627-
{ bit = 1; b->range -= split; b->value -= bigsplit; }
637+
{
638+
bit = 1; b->range -= split; b->value -= bigsplit;
639+
}
628640
else
629-
{ bit = 0; b->range = split; }
630-
while (b->range < 128)
631641
{
632-
b->value <<= 1;
633-
b->range <<= 1;
634-
if (++b->bit_count == 8)
635-
{
636-
b->bit_count = 0;
637-
if (b->buf < b->end)
638-
b->value |= *b->buf++;
639-
}
642+
bit = 0; b->range = split;
640643
}
644+
shift = 0;
645+
while (b->range < 128) { b->range <<= 1; shift++; }
646+
b->value <<= shift;
647+
b->count -= shift;
648+
if (b->count < 0) vp8b_fill(b);
641649
return bit;
642650
}
643651

652+
644653
static INLINE int vp8b_bit(vp8b *b) { return vp8b_get(b, 128); }
645654
static INLINE uint32_t vp8b_lit(vp8b *b, int n)
646655
{ uint32_t v = 0; int i; for (i = n-1; i >= 0; i--) v |= (uint32_t)vp8b_get(b,128) << i; return v; }
@@ -870,31 +879,18 @@ static const uint8_t kf_bmode_prob[10][10][9] = {
870879
{{190,80,35,99,180,80,126,54,45},{85,126,47,87,176,51,41,20,32},{101,75,128,139,118,146,116,128,85},{56,41,15,176,236,85,37,9,62},{146,36,19,30,171,255,97,27,20},{71,30,17,119,118,255,17,18,138},{101,38,60,138,55,70,43,26,142},{138,45,61,62,219,1,81,188,64},{32,41,20,117,151,142,20,21,163},{112,19,12,61,195,128,48,4,24}}
871880
};
872881

873-
/* Decode a B_PRED sub-block mode from the key-frame tree (RFC 6386 §12.1).
874-
* The tree is NOT a linear chain — it has a branch at node 3:
875-
* node0: p[0] → DC(0) | node1
876-
* node1: p[1] → TM(1) | node2
877-
* node2: p[2] → VE(2) | node3
878-
* node3: p[3] → node4{HE,RD,VR} | node6{LD,VL,HD,HU}
879-
* node4: p[4] → HE(3) | node5
880-
* node5: p[5] → RD(5) | VR(7)
881-
* node6: p[6] → LD(4) | node7
882-
* node7: p[7] → VL(8) | node8
883-
* node8: p[8] → HD(6) | HU(9)
884-
* Mode values: DC=0,TM=1,VE=2,HE=3,LD=4,RD=5,HD=6,VR=7,VL=8,HU=9 */
882+
/* Decode a B_PRED sub-block mode from the key-frame tree (RFC 6386 §12.1) */
885883
static int vp8_read_bmode(vp8b *br, int above, int left)
886884
{
887885
const uint8_t *p = kf_bmode_prob[above][left];
888886
if (!vp8b_get(br, p[0])) return 0; /* B_DC_PRED */
889887
if (!vp8b_get(br, p[1])) return 1; /* B_TM_PRED */
890888
if (!vp8b_get(br, p[2])) return 2; /* B_VE_PRED */
891889
if (!vp8b_get(br, p[3])) {
892-
/* Left subtree: HE, LD, RD */
893890
if (!vp8b_get(br, p[4])) return 3; /* B_HE_PRED */
894891
if (!vp8b_get(br, p[5])) return 5; /* B_LD_PRED */
895892
return 6; /* B_RD_PRED */
896893
} else {
897-
/* Right subtree: VR, VL, HD, HU */
898894
if (!vp8b_get(br, p[6])) return 4; /* B_VR_PRED */
899895
if (!vp8b_get(br, p[7])) return 7; /* B_VL_PRED */
900896
if (!vp8b_get(br, p[8])) return 8; /* B_HD_PRED */
@@ -935,7 +931,7 @@ static void vp8_pred4x4(uint8_t *d, int s, int m,
935931
memset(d+j*s,(uint8_t)v,4);
936932
}
937933
break;
938-
case 4: /* B_VR_PRED (4) */
934+
case 4: /* B_VR_PRED */
939935
d[3*s+0]=(uint8_t)((l[2]+2*l[1]+l[0]+2)>>2);
940936
d[2*s+0]=(uint8_t)((l[1]+2*l[0]+tl+2)>>2);
941937
d[1*s+0]=d[3*s+1]=(uint8_t)((l[0]+2*tl+a[0]+2)>>2);
@@ -947,7 +943,7 @@ static void vp8_pred4x4(uint8_t *d, int s, int m,
947943
d[0*s+3]=(uint8_t)((a[2]+a[3]+1)>>1);
948944
d[1*s+3]=(uint8_t)((a[1]+2*a[2]+a[3]+2)>>2);
949945
break;
950-
case 5: /* B_LD_PRED (5) */
946+
case 5: /* B_LD_PRED */
951947
d[0*s+0]=(uint8_t)((a[0]+2*a[1]+a[2]+2)>>2);
952948
d[0*s+1]=d[1*s+0]=(uint8_t)((a[1]+2*a[2]+a[3]+2)>>2);
953949
d[0*s+2]=d[1*s+1]=d[2*s+0]=(uint8_t)((a[2]+2*a[3]+a[4]+2)>>2);
@@ -956,7 +952,7 @@ static void vp8_pred4x4(uint8_t *d, int s, int m,
956952
d[2*s+3]=d[3*s+2]=(uint8_t)((a[5]+2*a[6]+a[7]+2)>>2);
957953
d[3*s+3]=(uint8_t)((a[6]+2*a[7]+a[7]+2)>>2);
958954
break;
959-
case 6: /* B_RD_PRED (6) */
955+
case 6: /* B_RD_PRED */
960956
d[3*s+0]=(uint8_t)((l[3]+2*l[2]+l[1]+2)>>2);
961957
d[2*s+0]=d[3*s+1]=(uint8_t)((l[2]+2*l[1]+l[0]+2)>>2);
962958
d[1*s+0]=d[2*s+1]=d[3*s+2]=(uint8_t)((l[1]+2*l[0]+tl+2)>>2);
@@ -966,45 +962,28 @@ static void vp8_pred4x4(uint8_t *d, int s, int m,
966962
d[0*s+3]=(uint8_t)((a[1]+2*a[2]+a[3]+2)>>2);
967963
break;
968964
case 7: /* B_VL_PRED */
969-
d[0*s+0]=(uint8_t)((a[0]+a[1]+1)>>1);
970-
d[1*s+0]=(uint8_t)((a[0]+2*a[1]+a[2]+2)>>2);
971-
d[0*s+1]=d[2*s+0]=(uint8_t)((a[1]+a[2]+1)>>1);
972-
d[1*s+1]=d[3*s+0]=(uint8_t)((a[1]+2*a[2]+a[3]+2)>>2);
973-
d[0*s+2]=d[2*s+1]=(uint8_t)((a[2]+a[3]+1)>>1);
974-
d[1*s+2]=d[3*s+1]=(uint8_t)((a[2]+2*a[3]+a[4]+2)>>2);
975-
d[0*s+3]=d[2*s+2]=(uint8_t)((a[3]+a[4]+1)>>1);
976-
d[1*s+3]=d[3*s+2]=(uint8_t)((a[3]+2*a[4]+a[5]+2)>>2);
977-
d[2*s+3]=(uint8_t)((a[4]+a[5]+1)>>1);
978-
d[3*s+3]=(uint8_t)((a[4]+2*a[5]+a[6]+2)>>2);
965+
d[0*s+0]=(uint8_t)((a[0]+a[1]+1)>>1); d[1*s+0]=(uint8_t)((a[0]+2*a[1]+a[2]+2)>>2);
966+
d[0*s+1]=d[2*s+0]=(uint8_t)((a[1]+a[2]+1)>>1); d[1*s+1]=d[3*s+0]=(uint8_t)((a[1]+2*a[2]+a[3]+2)>>2);
967+
d[0*s+2]=d[2*s+1]=(uint8_t)((a[2]+a[3]+1)>>1); d[1*s+2]=d[3*s+1]=(uint8_t)((a[2]+2*a[3]+a[4]+2)>>2);
968+
d[0*s+3]=d[2*s+2]=(uint8_t)((a[3]+a[4]+1)>>1); d[1*s+3]=d[3*s+2]=(uint8_t)((a[3]+2*a[4]+a[5]+2)>>2);
969+
d[2*s+3]=(uint8_t)((a[4]+a[5]+1)>>1); d[3*s+3]=(uint8_t)((a[4]+2*a[5]+a[6]+2)>>2);
979970
break;
980971
case 8: /* B_HD_PRED */
981-
d[3*s+0]=(uint8_t)((l[3]+l[2]+1)>>1);
982-
d[3*s+1]=(uint8_t)((l[3]+2*l[2]+l[1]+2)>>2);
983-
d[2*s+0]=d[3*s+2]=(uint8_t)((l[2]+l[1]+1)>>1);
984-
d[2*s+1]=d[3*s+3]=(uint8_t)((l[2]+2*l[1]+l[0]+2)>>2);
985-
d[1*s+0]=d[2*s+2]=(uint8_t)((l[1]+l[0]+1)>>1);
986-
d[1*s+1]=d[2*s+3]=(uint8_t)((l[1]+2*l[0]+tl+2)>>2);
987-
d[0*s+0]=d[1*s+2]=(uint8_t)((l[0]+tl+1)>>1);
988-
d[0*s+1]=d[1*s+3]=(uint8_t)((l[0]+2*tl+a[0]+2)>>2);
989-
d[0*s+2]=(uint8_t)((tl+2*a[0]+a[1]+2)>>2);
990-
d[0*s+3]=(uint8_t)((a[0]+2*a[1]+a[2]+2)>>2);
972+
d[3*s+0]=(uint8_t)((l[3]+l[2]+1)>>1); d[3*s+1]=(uint8_t)((l[3]+2*l[2]+l[1]+2)>>2);
973+
d[2*s+0]=d[3*s+2]=(uint8_t)((l[2]+l[1]+1)>>1); d[2*s+1]=d[3*s+3]=(uint8_t)((l[2]+2*l[1]+l[0]+2)>>2);
974+
d[1*s+0]=d[2*s+2]=(uint8_t)((l[1]+l[0]+1)>>1); d[1*s+1]=d[2*s+3]=(uint8_t)((l[1]+2*l[0]+tl+2)>>2);
975+
d[0*s+0]=d[1*s+2]=(uint8_t)((l[0]+tl+1)>>1); d[0*s+1]=d[1*s+3]=(uint8_t)((l[0]+2*tl+a[0]+2)>>2);
976+
d[0*s+2]=(uint8_t)((tl+2*a[0]+a[1]+2)>>2); d[0*s+3]=(uint8_t)((a[0]+2*a[1]+a[2]+2)>>2);
991977
break;
992978
case 9: /* B_HU_PRED */
993-
d[0*s+0]=(uint8_t)((l[0]+l[1]+1)>>1);
994-
d[0*s+1]=(uint8_t)((l[0]+2*l[1]+l[2]+2)>>2);
995-
d[0*s+2]=d[1*s+0]=(uint8_t)((l[1]+l[2]+1)>>1);
996-
d[0*s+3]=d[1*s+1]=(uint8_t)((l[1]+2*l[2]+l[3]+2)>>2);
997-
d[1*s+2]=d[2*s+0]=(uint8_t)((l[2]+l[3]+1)>>1);
998-
d[1*s+3]=d[2*s+1]=(uint8_t)((l[2]+2*l[3]+l[3]+2)>>2);
979+
d[0*s+0]=(uint8_t)((l[0]+l[1]+1)>>1); d[0*s+1]=(uint8_t)((l[0]+2*l[1]+l[2]+2)>>2);
980+
d[0*s+2]=d[1*s+0]=(uint8_t)((l[1]+l[2]+1)>>1); d[0*s+3]=d[1*s+1]=(uint8_t)((l[1]+2*l[2]+l[3]+2)>>2);
981+
d[1*s+2]=d[2*s+0]=(uint8_t)((l[2]+l[3]+1)>>1); d[1*s+3]=d[2*s+1]=(uint8_t)((l[2]+2*l[3]+l[3]+2)>>2);
999982
d[2*s+2]=d[2*s+3]=d[3*s+0]=d[3*s+1]=d[3*s+2]=d[3*s+3]=(uint8_t)l[3];
1000983
break;
1001984
default:
1002-
{ int sum=0;
1003-
for(i=0;i<4;i++) sum+=a[i]+l[i];
1004-
{ uint8_t dc=(uint8_t)((sum+4)>>3);
1005-
for(j=0;j<4;j++) memset(d+j*s,dc,4); }
1006-
break;
1007-
}
985+
{ int sum=0; for(i=0;i<4;i++) sum+=a[i]+l[i];
986+
{ uint8_t dc=(uint8_t)((sum+4)>>3); for(j=0;j<4;j++) memset(d+j*s,dc,4); } break; }
1008987
}
1009988
}
1010989

@@ -1060,7 +1039,7 @@ static uint32_t *vp8_decode(const uint8_t *data, size_t len,
10601039
mbw = (w+15) >> 4; mbh = (h+15) >> 4;
10611040
p0 = data + 10;
10621041
if ((size_t)(p0 - data) + p0s > len) return NULL;
1063-
vp8b_init(&br, p0, p0s);
1042+
vp8b_init(&br, p0, (size_t)(data + len - p0));
10641043

10651044
vp8b_bit(&br); vp8b_bit(&br); /* color_space, clamping */
10661045

@@ -1242,14 +1221,11 @@ static uint32_t *vp8_decode(const uint8_t *data, size_t len,
12421221
/* Read segment ID if segmentation is enabled */
12431222
if (seg_enabled)
12441223
{
1245-
/* Segment ID: balanced binary tree (VP8 spec) */
1246-
if (!vp8b_get(&br, seg_prob[0])) {
1247-
/* Left subtree: seg 0 or 1 */
1248-
seg_id = vp8b_get(&br, seg_prob[1]) ? 1 : 0;
1249-
} else {
1250-
/* Right subtree: seg 2 or 3 */
1251-
seg_id = vp8b_get(&br, seg_prob[2]) ? 3 : 2;
1252-
}
1224+
/* VP8 segment tree: prob[0] -> left(prob[1]->seg0/seg1) / right(prob[2]->seg2/seg3) */
1225+
if (vp8b_get(&br, seg_prob[0]))
1226+
seg_id = 2 + vp8b_get(&br, seg_prob[2]);
1227+
else
1228+
seg_id = vp8b_get(&br, seg_prob[1]);
12531229
}
12541230

12551231
/* Compute per-MB quantizer based on segment */
@@ -1269,11 +1245,11 @@ static uint32_t *vp8_decode(const uint8_t *data, size_t len,
12691245
{ int q2 = mb_qp + uvdc_dq; uv_dc_q = vp8_dc_qlut[q2<0?0:q2>127?127:q2]; if(uv_dc_q>132)uv_dc_q=132; }
12701246
{ int q2 = mb_qp + uvac_dq; uv_ac_q = vp8_ac_qlut[q2<0?0:q2>127?127:q2]; }
12711247

1272-
/* Y mode — keyframe tree: B_PRED first (RFC 6386 §11.2) */
1273-
if (!vp8b_get(&br, vp8_ymp[0])) ym = 4; /* B_PRED */
1274-
else if (!vp8b_get(&br, vp8_ymp[1])) ym = 0; /* DC_PRED */
1275-
else if (!vp8b_get(&br, vp8_ymp[2])) ym = 1; /* V_PRED */
1276-
else if (!vp8b_get(&br, vp8_ymp[3])) ym = 2; /* H_PRED */
1248+
/* Y mode */
1249+
if (!vp8b_get(&br, vp8_ymp[0])) ym = 4; /* B_PRED first */
1250+
else if (!vp8b_get(&br, vp8_ymp[1])) ym = 0;
1251+
else if (!vp8b_get(&br, vp8_ymp[2])) ym = 1;
1252+
else if (!vp8b_get(&br, vp8_ymp[3])) ym = 2;
12771253
else ym = 3; /* TM_PRED */
12781254

12791255
if (ym == 4)
@@ -1319,6 +1295,10 @@ static uint32_t *vp8_decode(const uint8_t *data, size_t len,
13191295
else if (!vp8b_get(&br, vp8_uvmp[2])) uvm = 2;
13201296
else uvm = 3;
13211297

1298+
/* Skip flag */
1299+
if (skip_enabled)
1300+
is_skip = vp8b_bit(&br);
1301+
13221302
/* Gather prediction context */
13231303
if (my > 0) {
13241304
memcpy(ay, yb+(my*16-1)*ys+mx*16, 16);

0 commit comments

Comments
 (0)