@@ -3921,100 +3921,124 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
39213921 return 0 ;
39223922}
39233923
3924- static int demote_free_hugetlb_folio (struct hstate * h , struct folio * folio )
3924+ static long demote_free_hugetlb_folios (struct hstate * src , struct hstate * dst ,
3925+ struct list_head * src_list )
39253926{
3926- int i , nid = folio_nid (folio );
3927- struct hstate * target_hstate ;
3928- struct page * subpage ;
3929- struct folio * inner_folio ;
3930- int rc = 0 ;
3931-
3932- target_hstate = size_to_hstate (PAGE_SIZE << h -> demote_order );
3927+ long rc ;
3928+ struct folio * folio , * next ;
3929+ LIST_HEAD (dst_list );
3930+ LIST_HEAD (ret_list );
39333931
3934- remove_hugetlb_folio (h , folio , false);
3935- spin_unlock_irq (& hugetlb_lock );
3936-
3937- /*
3938- * If vmemmap already existed for folio, the remove routine above would
3939- * have cleared the hugetlb folio flag. Hence the folio is technically
3940- * no longer a hugetlb folio. hugetlb_vmemmap_restore_folio can only be
3941- * passed hugetlb folios and will BUG otherwise.
3942- */
3943- if (folio_test_hugetlb (folio )) {
3944- rc = hugetlb_vmemmap_restore_folio (h , folio );
3945- if (rc ) {
3946- /* Allocation of vmemmmap failed, we can not demote folio */
3947- spin_lock_irq (& hugetlb_lock );
3948- add_hugetlb_folio (h , folio , false);
3949- return rc ;
3950- }
3951- }
3952-
3953- /*
3954- * Use destroy_compound_hugetlb_folio_for_demote for all huge page
3955- * sizes as it will not ref count folios.
3956- */
3957- destroy_compound_hugetlb_folio_for_demote (folio , huge_page_order (h ));
3932+ rc = hugetlb_vmemmap_restore_folios (src , src_list , & ret_list );
3933+ list_splice_init (& ret_list , src_list );
39583934
39593935 /*
39603936 * Taking target hstate mutex synchronizes with set_max_huge_pages.
39613937 * Without the mutex, pages added to target hstate could be marked
39623938 * as surplus.
39633939 *
3964- * Note that we already hold h ->resize_lock. To prevent deadlock,
3940+ * Note that we already hold src ->resize_lock. To prevent deadlock,
39653941 * use the convention of always taking larger size hstate mutex first.
39663942 */
3967- mutex_lock (& target_hstate -> resize_lock );
3968- for (i = 0 ; i < pages_per_huge_page (h );
3969- i += pages_per_huge_page (target_hstate )) {
3970- subpage = folio_page (folio , i );
3971- inner_folio = page_folio (subpage );
3972- if (hstate_is_gigantic (target_hstate ))
3973- prep_compound_gigantic_folio_for_demote (inner_folio ,
3974- target_hstate -> order );
3975- else
3976- prep_compound_page (subpage , target_hstate -> order );
3977- folio_change_private (inner_folio , NULL );
3978- prep_new_hugetlb_folio (target_hstate , inner_folio , nid );
3979- free_huge_folio (inner_folio );
3943+ mutex_lock (& dst -> resize_lock );
3944+
3945+ list_for_each_entry_safe (folio , next , src_list , lru ) {
3946+ int i ;
3947+
3948+ if (folio_test_hugetlb_vmemmap_optimized (folio ))
3949+ continue ;
3950+
3951+ list_del (& folio -> lru );
3952+ /*
3953+ * Use destroy_compound_hugetlb_folio_for_demote for all huge page
3954+ * sizes as it will not ref count folios.
3955+ */
3956+ destroy_compound_hugetlb_folio_for_demote (folio , huge_page_order (src ));
3957+
3958+ for (i = 0 ; i < pages_per_huge_page (src ); i += pages_per_huge_page (dst )) {
3959+ struct page * page = folio_page (folio , i );
3960+
3961+ if (hstate_is_gigantic (dst ))
3962+ prep_compound_gigantic_folio_for_demote (page_folio (page ),
3963+ dst -> order );
3964+ else
3965+ prep_compound_page (page , dst -> order );
3966+ set_page_private (page , 0 );
3967+
3968+ init_new_hugetlb_folio (dst , page_folio (page ));
3969+ list_add (& page -> lru , & dst_list );
3970+ }
39803971 }
3981- mutex_unlock (& target_hstate -> resize_lock );
39823972
3983- spin_lock_irq ( & hugetlb_lock );
3973+ prep_and_add_allocated_folios ( dst , & dst_list );
39843974
3985- /*
3986- * Not absolutely necessary, but for consistency update max_huge_pages
3987- * based on pool changes for the demoted page.
3988- */
3989- h -> max_huge_pages -- ;
3990- target_hstate -> max_huge_pages +=
3991- pages_per_huge_page (h ) / pages_per_huge_page (target_hstate );
3975+ mutex_unlock (& dst -> resize_lock );
39923976
39933977 return rc ;
39943978}
39953979
3996- static int demote_pool_huge_page (struct hstate * h , nodemask_t * nodes_allowed )
3980+ static long demote_pool_huge_page (struct hstate * src , nodemask_t * nodes_allowed ,
3981+ unsigned long nr_to_demote )
39973982 __must_hold (& hugetlb_lock )
39983983{
39993984 int nr_nodes , node ;
4000- struct folio * folio ;
3985+ struct hstate * dst ;
3986+ long rc = 0 ;
3987+ long nr_demoted = 0 ;
40013988
40023989 lockdep_assert_held (& hugetlb_lock );
40033990
40043991 /* We should never get here if no demote order */
4005- if (!h -> demote_order ) {
3992+ if (!src -> demote_order ) {
40063993 pr_warn ("HugeTLB: NULL demote order passed to demote_pool_huge_page.\n" );
40073994 return - EINVAL ; /* internal error */
40083995 }
3996+ dst = size_to_hstate (PAGE_SIZE << src -> demote_order );
40093997
4010- for_each_node_mask_to_free (h , nr_nodes , node , nodes_allowed ) {
4011- list_for_each_entry (folio , & h -> hugepage_freelists [node ], lru ) {
3998+ for_each_node_mask_to_free (src , nr_nodes , node , nodes_allowed ) {
3999+ LIST_HEAD (list );
4000+ struct folio * folio , * next ;
4001+
4002+ list_for_each_entry_safe (folio , next , & src -> hugepage_freelists [node ], lru ) {
40124003 if (folio_test_hwpoison (folio ))
40134004 continue ;
4014- return demote_free_hugetlb_folio (h , folio );
4005+
4006+ remove_hugetlb_folio (src , folio , false);
4007+ list_add (& folio -> lru , & list );
4008+
4009+ if (++ nr_demoted == nr_to_demote )
4010+ break ;
40154011 }
4012+
4013+ spin_unlock_irq (& hugetlb_lock );
4014+
4015+ rc = demote_free_hugetlb_folios (src , dst , & list );
4016+
4017+ spin_lock_irq (& hugetlb_lock );
4018+
4019+ list_for_each_entry_safe (folio , next , & list , lru ) {
4020+ list_del (& folio -> lru );
4021+ add_hugetlb_folio (src , folio , false);
4022+
4023+ nr_demoted -- ;
4024+ }
4025+
4026+ if (rc < 0 || nr_demoted == nr_to_demote )
4027+ break ;
40164028 }
40174029
4030+ /*
4031+ * Not absolutely necessary, but for consistency update max_huge_pages
4032+ * based on pool changes for the demoted page.
4033+ */
4034+ src -> max_huge_pages -= nr_demoted ;
4035+ dst -> max_huge_pages += nr_demoted << (huge_page_order (src ) - huge_page_order (dst ));
4036+
4037+ if (rc < 0 )
4038+ return rc ;
4039+
4040+ if (nr_demoted )
4041+ return nr_demoted ;
40184042 /*
40194043 * Only way to get here is if all pages on free lists are poisoned.
40204044 * Return -EBUSY so that caller will not retry.
@@ -4249,6 +4273,8 @@ static ssize_t demote_store(struct kobject *kobj,
42494273 spin_lock_irq (& hugetlb_lock );
42504274
42514275 while (nr_demote ) {
4276+ long rc ;
4277+
42524278 /*
42534279 * Check for available pages to demote each time thorough the
42544280 * loop as demote_pool_huge_page will drop hugetlb_lock.
@@ -4261,11 +4287,13 @@ static ssize_t demote_store(struct kobject *kobj,
42614287 if (!nr_available )
42624288 break ;
42634289
4264- err = demote_pool_huge_page (h , n_mask );
4265- if (err )
4290+ rc = demote_pool_huge_page (h , n_mask , nr_demote );
4291+ if (rc < 0 ) {
4292+ err = rc ;
42664293 break ;
4294+ }
42674295
4268- nr_demote -- ;
4296+ nr_demote -= rc ;
42694297 }
42704298
42714299 spin_unlock_irq (& hugetlb_lock );
0 commit comments