33#include "kboot.h"
44#include "adt.h"
55#include "assert.h"
6+ #include "math.h"
7+ #include "pmgr.h"
68#include "soc.h"
79#include "utils.h"
810
@@ -22,49 +24,129 @@ struct perf_state {
2224 u32 volt ;
2325};
2426
27+ static int get_core_counts (u32 * count , u32 nclusters , u32 ncores )
28+ {
29+ u64 base ;
30+ pmgr_adt_power_enable ("/arm-io/sgx" );
31+
32+ int adt_sgx_path [8 ];
33+ if (adt_path_offset_trace (adt , "/arm-io/sgx" , adt_sgx_path ) < 0 )
34+ bail ("ADT: GPU: Failed to get sgx\n" );
35+
36+ if (adt_get_reg (adt , adt_sgx_path , "reg" , 0 , & base , NULL ) < 0 )
37+ bail ("ADT: GPU: Failed to get sgx reg 0\n" );
38+
39+ u32 cores_lo = read32 (base + 0xd01500 );
40+ u32 cores_hi = read32 (base + 0xd01514 );
41+
42+ u64 cores = (((u64 )cores_hi ) << 32 ) | cores_lo ;
43+
44+ for (u32 i = 0 ; i < nclusters ; i ++ ) {
45+ count [i ] = __builtin_popcount (cores & MASK (ncores ));
46+ cores >>= ncores ;
47+ }
48+
49+ return 0 ;
50+ }
51+
52+ static void adjust_leakage (float * val , u32 clusters , u32 * cores , u32 max , float uncore_fraction )
53+ {
54+ for (u32 i = 0 ; i < clusters ; i ++ ) {
55+ float uncore = val [i ] * uncore_fraction ;
56+ float core = val [i ] - uncore ;
57+
58+ val [i ] = uncore + (cores [i ] / (float )max ) * core ;
59+ }
60+ }
61+
62+ static void load_fuses (float * out , u32 count , u64 base , u32 start , u32 width , float scale ,
63+ float offset , bool flip )
64+ {
65+ for (u32 i = 0 ; i < count ; i ++ ) {
66+ base += (start / 32 ) * 4 ;
67+ start &= 31 ;
68+
69+ u32 low = read32 (base );
70+ u32 high = read32 (base + 4 );
71+ u32 val = (((((u64 )high ) << 32 ) | low ) >> start ) & MASK (width );
72+
73+ float fval = (float )val * scale + offset ;
74+
75+ if (flip )
76+ out [count - i - 1 ] = fval ;
77+ else
78+ out [i ] = fval ;
79+
80+ start += width ;
81+ }
82+ }
83+
2584static u32 t8103_pwr_scale [] = {0 , 63 , 80 , 108 , 150 , 198 , 210 };
2685
2786// TODO this isn't a static table any more
2887static u32 t8112_pwr_scale [] = {0 , 66 , 92 , 119 , 153 , 184 , 214 , 240 , 240 };
2988
30- static int calc_power_t81xx (int sgx , u32 count , u32 table_count ,
31- const struct perf_state * perf_states , u32 * max_pwr )
89+ static int calc_power_t81xx (u32 count , u32 table_count , const struct perf_state * core ,
90+ const struct perf_state * sram , u32 * max_pwr , float * core_leak ,
91+ float * sram_leak )
3292{
33- UNUSED (sgx );
93+ UNUSED (sram );
94+ UNUSED (core_leak );
95+ UNUSED (sram_leak );
3496 u32 * pwr_scale ;
3597 u32 pwr_scale_count ;
98+ u32 core_count ;
99+ u32 max_cores ;
36100
37101 switch (chip_id ) {
38102 case T8103 :
39103 pwr_scale = t8103_pwr_scale ;
40104 pwr_scale_count = ARRAY_SIZE (t8103_pwr_scale );
105+ max_cores = 8 ;
41106 break ;
42107 case T8112 :
43108 pwr_scale = t8112_pwr_scale ;
44109 pwr_scale_count = ARRAY_SIZE (t8112_pwr_scale );
45110 break ;
111+ default :
112+ bail ("ADT: GPU: Unsupported chip\n" );
46113 }
47114
115+ if (get_core_counts (& core_count , 1 , max_cores ))
116+ return -1 ;
117+
48118 if (table_count != 1 )
49119 bail ("ADT: GPU: expected 1 perf state table but got %d\n" , table_count );
50120
51121 if (count != pwr_scale_count )
52122 bail ("ADT: GPU: expected %d perf states but got %d\n" , pwr_scale_count , count );
53123
54124 for (u32 i = 0 ; i < pwr_scale_count ; i ++ )
55- max_pwr [i ] = (u32 )perf_states [i ].volt * (u32 )pwr_scale [i ] * 100 ;
125+ max_pwr [i ] = (u32 )core [i ].volt * (u32 )pwr_scale [i ] * 100 ;
126+
127+ core_leak [0 ] = 1000.0 ;
128+ sram_leak [0 ] = 45.0 ;
129+
130+ adjust_leakage (core_leak , 1 , & core_count , max_cores , 0.12 );
131+ adjust_leakage (sram_leak , 1 , & core_count , max_cores , 0.2 );
56132
57133 return 0 ;
58134}
59135
60- // TODO
61- static u32 t6000_pwr_scale [] = {0 , 15 , 19 , 25 , 34 , 50 , 100 };
62-
63- static int calc_power_t600x (int sgx , u32 count , u32 table_count ,
64- const struct perf_state * perf_states , u32 * max_pwr )
136+ static int calc_power_t600x (u32 count , u32 table_count , const struct perf_state * core ,
137+ const struct perf_state * sram , u32 * max_pwr , float * core_leak ,
138+ float * sram_leak )
65139{
66- UNUSED (sgx );
67- UNUSED (perf_states );
140+ const float s_sram = 4.3547606 ;
141+ const float k_sram = 0.024927923 ;
142+
143+ // macOS difference: macOS uses a misbehaved piecewise function here
144+ // Since it's obviously wrong, let's just use only the first component
145+ const float s_core = 1.48461742 ;
146+ const float k_core = 0.39013552 ;
147+
148+ const float dk_core = 8.558 ;
149+ const float dk_sram = 0.05 ;
68150
69151 u32 nclusters = 0 ;
70152 switch (chip_id ) {
@@ -79,16 +161,64 @@ static int calc_power_t600x(int sgx, u32 count, u32 table_count,
79161 break ;
80162 }
81163
82- u32 pwr_scale_count = ARRAY_SIZE (t6000_pwr_scale );
164+ u32 core_count [MAX_CLUSTERS ];
165+
166+ if (get_core_counts (core_count , nclusters , 8 ))
167+ return -1 ;
168+
169+ load_fuses (core_leak + 0 , min (4 , nclusters ), 0x2922bc1b8 , 25 , 13 , 2 , 2 , false);
170+ load_fuses (sram_leak + 0 , min (4 , nclusters ), 0x2922bc1cc , 4 , 9 , 1 , 1 , false);
171+
172+ if (nclusters == 8 ) {
173+ load_fuses (core_leak + 4 , 4 , 0x22922bc1b8 , 25 , 13 , 2 , 2 , true);
174+ load_fuses (sram_leak + 4 , 4 , 0x22922bc1cc , 4 , 9 , 1 , 1 , true);
175+ }
176+
177+ printf ("FDT: GPU: Core counts: " );
178+ for (u32 i = 0 ; i < nclusters ; i ++ ) {
179+ printf ("%d " , core_count [i ]);
180+ }
181+ printf ("\n" );
182+
183+ adjust_leakage (core_leak , nclusters , core_count , 8 , 0.0825 );
184+ adjust_leakage (sram_leak , nclusters , core_count , 8 , 0.2247 );
83185
84186 if (table_count != nclusters )
85187 bail ("ADT: GPU: expected %d perf state tables but got %d\n" , nclusters , table_count );
86188
87- if (count != pwr_scale_count )
88- bail ("ADT: GPU: expected %d perf states but got %d\n" , pwr_scale_count , count );
189+ max_pwr [0 ] = 0 ;
190+
191+ for (u32 i = 1 ; i < count ; i ++ ) {
192+ u32 total_mw = 0 ;
193+
194+ for (u32 j = 0 ; j < nclusters ; j ++ ) {
195+ // macOS difference: macOS truncates Hz to integer MHz before doing this math.
196+ // That's probably wrong, so let's not do that.
89197
90- for (u32 i = 0 ; i < pwr_scale_count ; i ++ ) {
91- max_pwr [i ] = t6000_pwr_scale [i ] * 1667430 * nclusters / 8 ;
198+ float mw = 0 ;
199+ size_t idx = j * count + i ;
200+
201+ mw += sram [idx ].volt / 1000.f * sram_leak [j ] * k_sram *
202+ expf (sram [idx ].volt / 1000.f * s_sram );
203+ mw += core [idx ].volt / 1000.f * core_leak [j ] * k_core *
204+ expf (core [idx ].volt / 1000.f * s_core );
205+
206+ float sbase = sram [idx ].volt / 750.f ;
207+ float sram_v_p = sbase * sbase * sbase ;
208+ mw += dk_sram * (sram [idx ].freq / 1000000.f ) * sram_v_p ;
209+
210+ float cbase = core [idx ].volt / 750.f ;
211+ float core_v_p ;
212+ if (core [idx ].volt > 750 )
213+ core_v_p = cbase * cbase * cbase ; // v ^ 3
214+ else
215+ core_v_p = cbase * cbase ; // v ^ 2
216+ mw += dk_core * (core [idx ].freq / 1000000.f ) * core_v_p ;
217+
218+ total_mw += mw ;
219+ }
220+
221+ max_pwr [i ] = total_mw * 1000 ;
92222 }
93223
94224 return 0 ;
@@ -122,10 +252,31 @@ static int dt_set_region(void *dt, int sgx, const char *name, const char *path)
122252 return 0 ;
123253}
124254
255+ int fdt_set_float_array (void * dt , int node , const char * name , float * val , int count )
256+ {
257+ fdt32_t data [MAX_CLUSTERS ];
258+
259+ if (count > MAX_CLUSTERS )
260+ bail ("FDT: GPU: fdt_set_float_array() with too many values\n" );
261+
262+ memcpy (data , val , sizeof (float ) * count );
263+ for (int i = 0 ; i < count ; i ++ ) {
264+ data [i ] = cpu_to_fdt32 (data [i ]);
265+ }
266+
267+ if (fdt_setprop_inplace (dt , node , name , data , sizeof (u32 ) * count ))
268+ bail ("FDT: GPU: Failed to set %s\n" , name );
269+
270+ return 0 ;
271+ }
272+
125273int dt_set_gpu (void * dt )
126274{
127- int (* calc_power )(int sgx , u32 count , u32 table_count , const struct perf_state * perf ,
128- u32 * max_pwr );
275+ int (* calc_power )(u32 count , u32 table_count , const struct perf_state * perf ,
276+ const struct perf_state * sram , u32 * max_pwr , float * core_leak ,
277+ float * sram_leak );
278+
279+ printf ("FDT: GPU: Initializing GPU info\n" );
129280
130281 switch (chip_id ) {
131282 case T8103 :
@@ -177,16 +328,44 @@ int dt_set_gpu(void *dt)
177328 bail ("ADT: GPU: perf-state-table-count too large\n" );
178329
179330 u32 perf_states_len ;
180- const struct perf_state * perf_states ;
331+ const struct perf_state * perf_states , * perf_states_sram ;
181332
182333 perf_states = adt_getprop (adt , sgx , "perf-states" , & perf_states_len );
183334 if (!perf_states ||
184335 perf_states_len != sizeof (* perf_states ) * perf_state_count * perf_state_table_count )
185336 bail ("ADT: GPU: invalid perf-states length\n" );
186337
338+ perf_states_sram = adt_getprop (adt , sgx , "perf-states-sram" , & perf_states_len );
339+ if (perf_states_sram &&
340+ perf_states_len != sizeof (* perf_states ) * perf_state_count * perf_state_table_count )
341+ bail ("ADT: GPU: invalid perf-states-sram length\n" );
342+
187343 u32 max_pwr [MAX_PSTATES ];
344+ float core_leak [MAX_CLUSTERS ];
345+ float sram_leak [MAX_CLUSTERS ];
346+
347+ if (calc_power (perf_state_count , perf_state_table_count , perf_states , perf_states_sram , max_pwr ,
348+ core_leak , sram_leak ))
349+ return -1 ;
350+
351+ printf ("FDT: GPU: Max power table: " );
352+ for (u32 i = 0 ; i < perf_state_count ; i ++ ) {
353+ printf ("%d " , max_pwr [i ]);
354+ }
355+ printf ("\nFDT: GPU: Core leakage table: " );
356+ for (u32 i = 0 ; i < perf_state_table_count ; i ++ ) {
357+ printf ("%d.%03d " , (int )core_leak [i ], ((int )(core_leak [i ] * 1000 ) % 1000 ));
358+ }
359+ printf ("\nFDT: GPU: SRAM leakage table: " );
360+ for (u32 i = 0 ; i < perf_state_table_count ; i ++ ) {
361+ printf ("%d.%03d " , (int )sram_leak [i ], ((int )(sram_leak [i ] * 1000 ) % 1000 ));
362+ }
363+ printf ("\n" );
364+
365+ if (fdt_set_float_array (dt , gpu , "apple,core-leak-coef" , core_leak , perf_state_table_count ))
366+ return -1 ;
188367
189- if (calc_power ( sgx , perf_state_count , perf_state_table_count , perf_states , max_pwr ))
368+ if (fdt_set_float_array ( dt , gpu , "apple,sram-leak-coef" , sram_leak , perf_state_table_count ))
190369 return -1 ;
191370
192371 u32 i = 0 ;
0 commit comments