@@ -83,10 +83,7 @@ static void load_fuses(float *out, u32 count, u64 base, u32 start, u32 width, fl
8383
8484static u32 t8103_pwr_scale [] = {0 , 63 , 80 , 108 , 150 , 198 , 210 };
8585
86- // TODO this isn't a static table any more
87- static u32 t8112_pwr_scale [] = {0 , 66 , 92 , 119 , 153 , 184 , 214 , 240 , 240 };
88-
89- static int calc_power_t81xx (u32 count , u32 table_count , const struct perf_state * core ,
86+ static int calc_power_t8103 (u32 count , u32 table_count , const struct perf_state * core ,
9087 const struct perf_state * sram , u32 * max_pwr , float * core_leak ,
9188 float * sram_leak )
9289{
@@ -104,10 +101,6 @@ static int calc_power_t81xx(u32 count, u32 table_count, const struct perf_state
104101 pwr_scale_count = ARRAY_SIZE (t8103_pwr_scale );
105102 max_cores = 8 ;
106103 break ;
107- case T8112 :
108- pwr_scale = t8112_pwr_scale ;
109- pwr_scale_count = ARRAY_SIZE (t8112_pwr_scale );
110- break ;
111104 default :
112105 bail ("ADT: GPU: Unsupported chip\n" );
113106 }
@@ -137,51 +130,77 @@ static int calc_power_t600x(u32 count, u32 table_count, const struct perf_state
137130 const struct perf_state * sram , u32 * max_pwr , float * core_leak ,
138131 float * sram_leak )
139132{
140- const float s_sram = 4.3547606 ;
141- const float k_sram = 0.024927923 ;
133+ float s_sram , k_sram , s_core , k_core ;
134+ float dk_core , dk_sram ;
135+ float imax = 1000 ;
142136
143- // macOS difference: macOS uses a misbehaved piecewise function here
144- // Since it's obviously wrong, let's just use only the first component
145- const float s_core = 1.48461742 ;
146- const float k_core = 0.39013552 ;
137+ u32 nclusters = 0 ;
138+ u32 ncores = 0 ;
139+ u32 core_count [MAX_CLUSTERS ];
147140
148- const float dk_core = 8.558 ;
149- const float dk_sram = 0.05 ;
141+ bool simple_exps = false ;
142+ bool adjust_leakages = true ;
150143
151- u32 nclusters = 0 ;
152144 switch (chip_id ) {
153- case T6000 :
154- nclusters = 2 ;
155- break ;
145+ case T6002 :
146+ nclusters += 4 ;
147+ load_fuses (core_leak + 4 , 4 , 0x22922bc1b8 , 25 , 13 , 2 , 2 , true);
148+ load_fuses (sram_leak + 4 , 4 , 0x22922bc1cc , 4 , 9 , 1 , 1 , true);
149+ // fallthrough
156150 case T6001 :
157- nclusters = 4 ;
151+ nclusters += 2 ;
152+ case T6000 :
153+ nclusters += 2 ;
154+ load_fuses (core_leak + 0 , min (4 , nclusters ), 0x2922bc1b8 , 25 , 13 , 2 , 2 , false);
155+ load_fuses (sram_leak + 0 , min (4 , nclusters ), 0x2922bc1cc , 4 , 9 , 1 , 1 , false);
156+
157+ s_sram = 4.3547606 ;
158+ k_sram = 0.024927923 ;
159+ // macOS difference: macOS uses a misbehaved piecewise function here
160+ // Since it's obviously wrong, let's just use only the first component
161+ s_core = 1.48461742 ;
162+ k_core = 0.39013552 ;
163+ dk_core = 8.558 ;
164+ dk_sram = 0.05 ;
165+
166+ ncores = 8 ;
167+ adjust_leakages = true;
168+ imax = 26.0 ;
158169 break ;
159- case T6002 :
160- nclusters = 8 ;
170+ case T8112 :
171+ nclusters = 1 ;
172+ load_fuses (core_leak , 1 , 0x23d2c84dc , 30 , 13 , 2 , 2 , false);
173+ load_fuses (sram_leak , 1 , 0x23d2c84b0 , 15 , 9 , 1 , 1 , false);
174+
175+ s_sram = 3.61619841 ;
176+ k_sram = 0.0529281 ;
177+ // macOS difference: macOS uses a misbehaved piecewise function here
178+ // Since it's obviously wrong, let's just use only the first component
179+ s_core = 1.21356187 ;
180+ k_core = 0.43328839 ;
181+ dk_core = 9.83196 ;
182+ dk_sram = 0.07828 ;
183+
184+ simple_exps = true;
185+ ncores = 10 ;
186+ adjust_leakages = false; // pre-adjusted?
187+ imax = 24.0 ;
161188 break ;
162189 }
163190
164- u32 core_count [MAX_CLUSTERS ];
165-
166- if (get_core_counts (core_count , nclusters , 8 ))
191+ if (get_core_counts (core_count , nclusters , ncores ))
167192 return -1 ;
168193
169- load_fuses (core_leak + 0 , min (4 , nclusters ), 0x2922bc1b8 , 25 , 13 , 2 , 2 , false);
170- load_fuses (sram_leak + 0 , min (4 , nclusters ), 0x2922bc1cc , 4 , 9 , 1 , 1 , false);
171-
172- if (nclusters == 8 ) {
173- load_fuses (core_leak + 4 , 4 , 0x22922bc1b8 , 25 , 13 , 2 , 2 , true);
174- load_fuses (sram_leak + 4 , 4 , 0x22922bc1cc , 4 , 9 , 1 , 1 , true);
175- }
176-
177194 printf ("FDT: GPU: Core counts: " );
178195 for (u32 i = 0 ; i < nclusters ; i ++ ) {
179196 printf ("%d " , core_count [i ]);
180197 }
181198 printf ("\n" );
182199
183- adjust_leakage (core_leak , nclusters , core_count , 8 , 0.0825 );
184- adjust_leakage (sram_leak , nclusters , core_count , 8 , 0.2247 );
200+ if (adjust_leakages ) {
201+ adjust_leakage (core_leak , nclusters , core_count , ncores , 0.0825 );
202+ adjust_leakage (sram_leak , nclusters , core_count , ncores , 0.2247 );
203+ }
185204
186205 if (table_count != nclusters )
187206 bail ("ADT: GPU: expected %d perf state tables but got %d\n" , nclusters , table_count );
@@ -204,17 +223,24 @@ static int calc_power_t600x(u32 count, u32 table_count, const struct perf_state
204223 expf (core [idx ].volt / 1000.f * s_core );
205224
206225 float sbase = sram [idx ].volt / 750.f ;
207- float sram_v_p = sbase * sbase * sbase ;
226+ float sram_v_p ;
227+ if (simple_exps )
228+ sram_v_p = sbase * sbase ; // v ^ 2
229+ else
230+ sram_v_p = sbase * sbase * sbase ; // v ^ 3
208231 mw += dk_sram * (sram [idx ].freq / 1000000.f ) * sram_v_p ;
209232
210233 float cbase = core [idx ].volt / 750.f ;
211234 float core_v_p ;
212- if (core [idx ].volt > 750 )
213- core_v_p = cbase * cbase * cbase ; // v ^ 3
214- else
235+ if (simple_exps || core [idx ].volt < 750 )
215236 core_v_p = cbase * cbase ; // v ^ 2
237+ else
238+ core_v_p = cbase * cbase * cbase ; // v ^ 3
216239 mw += dk_core * (core [idx ].freq / 1000000.f ) * core_v_p ;
217240
241+ if (mw > imax * core [idx ].volt )
242+ mw = imax * core [idx ].volt ;
243+
218244 total_mw += mw ;
219245 }
220246
@@ -280,12 +306,12 @@ int dt_set_gpu(void *dt)
280306
281307 switch (chip_id ) {
282308 case T8103 :
283- case T8112 :
284- calc_power = calc_power_t81xx ;
309+ calc_power = calc_power_t8103 ;
285310 break ;
286311 case T6000 :
287312 case T6001 :
288313 case T6002 :
314+ case T8112 :
289315 calc_power = calc_power_t600x ;
290316 break ;
291317 default :
0 commit comments