Skip to content

Commit db93de5

Browse files
hoshinolinamarcan
authored andcommitted
kboot_gpu: Add proper max-power and leakage calculations for t8112
This uses a similar algorithm to t600x, but with different constants and exponents. The current limit also engages here, so add it, and add the t600x one too (even though it is never hit). Signed-off-by: Asahi Lina <[email protected]>
1 parent d20d89b commit db93de5

1 file changed

Lines changed: 68 additions & 42 deletions

File tree

src/kboot_gpu.c

Lines changed: 68 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,7 @@ static void load_fuses(float *out, u32 count, u64 base, u32 start, u32 width, fl
8383

8484
static u32 t8103_pwr_scale[] = {0, 63, 80, 108, 150, 198, 210};
8585

86-
// TODO this isn't a static table any more
87-
static u32 t8112_pwr_scale[] = {0, 66, 92, 119, 153, 184, 214, 240, 240};
88-
89-
static int calc_power_t81xx(u32 count, u32 table_count, const struct perf_state *core,
86+
static int calc_power_t8103(u32 count, u32 table_count, const struct perf_state *core,
9087
const struct perf_state *sram, u32 *max_pwr, float *core_leak,
9188
float *sram_leak)
9289
{
@@ -104,10 +101,6 @@ static int calc_power_t81xx(u32 count, u32 table_count, const struct perf_state
104101
pwr_scale_count = ARRAY_SIZE(t8103_pwr_scale);
105102
max_cores = 8;
106103
break;
107-
case T8112:
108-
pwr_scale = t8112_pwr_scale;
109-
pwr_scale_count = ARRAY_SIZE(t8112_pwr_scale);
110-
break;
111104
default:
112105
bail("ADT: GPU: Unsupported chip\n");
113106
}
@@ -137,51 +130,77 @@ static int calc_power_t600x(u32 count, u32 table_count, const struct perf_state
137130
const struct perf_state *sram, u32 *max_pwr, float *core_leak,
138131
float *sram_leak)
139132
{
140-
const float s_sram = 4.3547606;
141-
const float k_sram = 0.024927923;
133+
float s_sram, k_sram, s_core, k_core;
134+
float dk_core, dk_sram;
135+
float imax = 1000;
142136

143-
// macOS difference: macOS uses a misbehaved piecewise function here
144-
// Since it's obviously wrong, let's just use only the first component
145-
const float s_core = 1.48461742;
146-
const float k_core = 0.39013552;
137+
u32 nclusters = 0;
138+
u32 ncores = 0;
139+
u32 core_count[MAX_CLUSTERS];
147140

148-
const float dk_core = 8.558;
149-
const float dk_sram = 0.05;
141+
bool simple_exps = false;
142+
bool adjust_leakages = true;
150143

151-
u32 nclusters = 0;
152144
switch (chip_id) {
153-
case T6000:
154-
nclusters = 2;
155-
break;
145+
case T6002:
146+
nclusters += 4;
147+
load_fuses(core_leak + 4, 4, 0x22922bc1b8, 25, 13, 2, 2, true);
148+
load_fuses(sram_leak + 4, 4, 0x22922bc1cc, 4, 9, 1, 1, true);
149+
// fallthrough
156150
case T6001:
157-
nclusters = 4;
151+
nclusters += 2;
152+
case T6000:
153+
nclusters += 2;
154+
load_fuses(core_leak + 0, min(4, nclusters), 0x2922bc1b8, 25, 13, 2, 2, false);
155+
load_fuses(sram_leak + 0, min(4, nclusters), 0x2922bc1cc, 4, 9, 1, 1, false);
156+
157+
s_sram = 4.3547606;
158+
k_sram = 0.024927923;
159+
// macOS difference: macOS uses a misbehaved piecewise function here
160+
// Since it's obviously wrong, let's just use only the first component
161+
s_core = 1.48461742;
162+
k_core = 0.39013552;
163+
dk_core = 8.558;
164+
dk_sram = 0.05;
165+
166+
ncores = 8;
167+
adjust_leakages = true;
168+
imax = 26.0;
158169
break;
159-
case T6002:
160-
nclusters = 8;
170+
case T8112:
171+
nclusters = 1;
172+
load_fuses(core_leak, 1, 0x23d2c84dc, 30, 13, 2, 2, false);
173+
load_fuses(sram_leak, 1, 0x23d2c84b0, 15, 9, 1, 1, false);
174+
175+
s_sram = 3.61619841;
176+
k_sram = 0.0529281;
177+
// macOS difference: macOS uses a misbehaved piecewise function here
178+
// Since it's obviously wrong, let's just use only the first component
179+
s_core = 1.21356187;
180+
k_core = 0.43328839;
181+
dk_core = 9.83196;
182+
dk_sram = 0.07828;
183+
184+
simple_exps = true;
185+
ncores = 10;
186+
adjust_leakages = false; // pre-adjusted?
187+
imax = 24.0;
161188
break;
162189
}
163190

164-
u32 core_count[MAX_CLUSTERS];
165-
166-
if (get_core_counts(core_count, nclusters, 8))
191+
if (get_core_counts(core_count, nclusters, ncores))
167192
return -1;
168193

169-
load_fuses(core_leak + 0, min(4, nclusters), 0x2922bc1b8, 25, 13, 2, 2, false);
170-
load_fuses(sram_leak + 0, min(4, nclusters), 0x2922bc1cc, 4, 9, 1, 1, false);
171-
172-
if (nclusters == 8) {
173-
load_fuses(core_leak + 4, 4, 0x22922bc1b8, 25, 13, 2, 2, true);
174-
load_fuses(sram_leak + 4, 4, 0x22922bc1cc, 4, 9, 1, 1, true);
175-
}
176-
177194
printf("FDT: GPU: Core counts: ");
178195
for (u32 i = 0; i < nclusters; i++) {
179196
printf("%d ", core_count[i]);
180197
}
181198
printf("\n");
182199

183-
adjust_leakage(core_leak, nclusters, core_count, 8, 0.0825);
184-
adjust_leakage(sram_leak, nclusters, core_count, 8, 0.2247);
200+
if (adjust_leakages) {
201+
adjust_leakage(core_leak, nclusters, core_count, ncores, 0.0825);
202+
adjust_leakage(sram_leak, nclusters, core_count, ncores, 0.2247);
203+
}
185204

186205
if (table_count != nclusters)
187206
bail("ADT: GPU: expected %d perf state tables but got %d\n", nclusters, table_count);
@@ -204,17 +223,24 @@ static int calc_power_t600x(u32 count, u32 table_count, const struct perf_state
204223
expf(core[idx].volt / 1000.f * s_core);
205224

206225
float sbase = sram[idx].volt / 750.f;
207-
float sram_v_p = sbase * sbase * sbase;
226+
float sram_v_p;
227+
if (simple_exps)
228+
sram_v_p = sbase * sbase; // v ^ 2
229+
else
230+
sram_v_p = sbase * sbase * sbase; // v ^ 3
208231
mw += dk_sram * (sram[idx].freq / 1000000.f) * sram_v_p;
209232

210233
float cbase = core[idx].volt / 750.f;
211234
float core_v_p;
212-
if (core[idx].volt > 750)
213-
core_v_p = cbase * cbase * cbase; // v ^ 3
214-
else
235+
if (simple_exps || core[idx].volt < 750)
215236
core_v_p = cbase * cbase; // v ^ 2
237+
else
238+
core_v_p = cbase * cbase * cbase; // v ^ 3
216239
mw += dk_core * (core[idx].freq / 1000000.f) * core_v_p;
217240

241+
if (mw > imax * core[idx].volt)
242+
mw = imax * core[idx].volt;
243+
218244
total_mw += mw;
219245
}
220246

@@ -280,12 +306,12 @@ int dt_set_gpu(void *dt)
280306

281307
switch (chip_id) {
282308
case T8103:
283-
case T8112:
284-
calc_power = calc_power_t81xx;
309+
calc_power = calc_power_t8103;
285310
break;
286311
case T6000:
287312
case T6001:
288313
case T6002:
314+
case T8112:
289315
calc_power = calc_power_t600x;
290316
break;
291317
default:

0 commit comments

Comments
 (0)