diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index aebc710f0d6a..b7732c6665aa 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -165,6 +165,15 @@ static inline bool slot_allocated(struct zram *zram, u32 index) test_slot_flag(zram, index, ZRAM_WB); } +#define ZRAM_FLAGS_TO_CLEAR_ON_FREE (BIT(ZRAM_IDLE) | \ + BIT(ZRAM_INCOMPRESSIBLE) | \ + BIT(ZRAM_PP_SLOT)) + +static inline void clear_slot_flags_on_free(struct zram *zram, u32 index) +{ + zram->table[index].attr.flags &= ~ZRAM_FLAGS_TO_CLEAR_ON_FREE; +} + static inline void set_slot_comp_priority(struct zram *zram, u32 index, u32 prio) { @@ -2000,17 +2009,20 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) return true; } -static void slot_free(struct zram *zram, u32 index) +/* + * Clear slot metadata and extract the zsmalloc handle for freeing. + * Returns the handle that needs to be freed via zs_free(), or 0 if + * no zsmalloc freeing is needed (e.g. same-filled or writeback slots). + */ +static unsigned long slot_free_extract(struct zram *zram, u32 index) { - unsigned long handle; + unsigned long handle = 0; #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME zram->table[index].attr.ac_time = 0; #endif - clear_slot_flag(zram, index, ZRAM_IDLE); - clear_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE); - clear_slot_flag(zram, index, ZRAM_PP_SLOT); + clear_slot_flags_on_free(zram, index); set_slot_comp_priority(zram, index, 0); if (test_slot_flag(zram, index, ZRAM_HUGE)) { @@ -2041,9 +2053,7 @@ static void slot_free(struct zram *zram, u32 index) handle = get_slot_handle(zram, index); if (!handle) - return; - - zs_free(zram->mem_pool, handle); + return 0; atomic64_sub(get_slot_size(zram, index), &zram->stats.compr_data_size); @@ -2051,6 +2061,15 @@ static void slot_free(struct zram *zram, u32 index) atomic64_dec(&zram->stats.pages_stored); set_slot_handle(zram, index, 0); set_slot_size(zram, index, 0); + + return handle; +} + +static void slot_free(struct zram *zram, u32 index) +{ + unsigned long handle = slot_free_extract(zram, index); + + zs_free(zram->mem_pool, handle); } static int read_same_filled_page(struct zram *zram, struct page *page, @@ -2797,7 +2816,7 @@ static void zram_slot_free_notify(struct block_device *bdev, return; } - slot_free(zram, index); + zs_free_deferred(zram->mem_pool, slot_free_extract(zram, index)); slot_unlock(zram, index); } diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 478410c880b1..1e5ac1a39d41 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -30,6 +30,8 @@ void zs_destroy_pool(struct zs_pool *pool); unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t flags, const int nid); void zs_free(struct zs_pool *pool, unsigned long obj); +void zs_free_deferred(struct zs_pool *pool, unsigned long handle); +void zs_free_deferred_flush(struct zs_pool *pool); size_t zs_huge_class_size(struct zs_pool *pool); diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 63128ddb7959..defc892555e4 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -53,6 +53,10 @@ #define ZS_HANDLE_SIZE (sizeof(unsigned long)) +#define ZS_DEFERRED_FREE_MAX_BYTES (128 << 20) +#define ZS_DEFERRED_FREE_CAPACITY (ZS_DEFERRED_FREE_MAX_BYTES >> PAGE_SHIFT) +#define ZS_DEFERRED_FREE_THRESHOLD (ZS_DEFERRED_FREE_CAPACITY / 2) + /* * Object location (, ) is encoded as * a single (unsigned long) handle value. @@ -217,6 +221,13 @@ struct zs_pool { /* protect zspage migration/compaction */ rwlock_t lock; atomic_t compaction_in_progress; + + /* deferred free support */ + spinlock_t deferred_lock; + unsigned long *deferred_handles; + unsigned int deferred_count; + unsigned int deferred_capacity; + struct work_struct deferred_free_work; }; static inline void zpdesc_set_first(struct zpdesc *zpdesc) @@ -579,6 +590,19 @@ static int zs_stats_size_show(struct seq_file *s, void *v) } DEFINE_SHOW_ATTRIBUTE(zs_stats_size); +static int zs_stats_deferred_show(struct seq_file *s, void *v) +{ + struct zs_pool *pool = s->private; + + spin_lock(&pool->deferred_lock); + seq_printf(s, "pending: %u\n", pool->deferred_count); + seq_printf(s, "capacity: %u\n", pool->deferred_capacity); + spin_unlock(&pool->deferred_lock); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(zs_stats_deferred); + static void zs_pool_stat_create(struct zs_pool *pool, const char *name) { if (!zs_stat_root) { @@ -590,6 +614,9 @@ static void zs_pool_stat_create(struct zs_pool *pool, const char *name) debugfs_create_file("classes", S_IFREG | 0444, pool->stat_dentry, pool, &zs_stats_size_fops); + debugfs_create_file("deferred_free", S_IFREG | 0444, + pool->stat_dentry, pool, + &zs_stats_deferred_fops); } static void zs_pool_stat_destroy(struct zs_pool *pool) @@ -801,13 +828,10 @@ static int trylock_zspage(struct zspage *zspage) return 0; } -static void __free_zspage(struct zs_pool *pool, struct size_class *class, - struct zspage *zspage) +static inline void __free_zspage_lockless(struct zs_pool *pool, struct zspage *zspage) { struct zpdesc *zpdesc, *next; - assert_spin_locked(&class->lock); - VM_BUG_ON(get_zspage_inuse(zspage)); VM_BUG_ON(zspage->fullness != ZS_INUSE_RATIO_0); @@ -823,7 +847,13 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class, } while (zpdesc != NULL); cache_free_zspage(zspage); +} +static void __free_zspage(struct zs_pool *pool, struct size_class *class, + struct zspage *zspage) +{ + assert_spin_locked(&class->lock); + __free_zspage_lockless(pool, zspage); class_stat_sub(class, ZS_OBJS_ALLOCATED, class->objs_per_zspage); atomic_long_sub(class->pages_per_zspage, &pool->pages_allocated); } @@ -1388,6 +1418,7 @@ void zs_free(struct zs_pool *pool, unsigned long handle) unsigned long obj; struct size_class *class; int fullness; + struct zspage *zspage_to_free = NULL; if (IS_ERR_OR_NULL((void *)handle)) return; @@ -1408,14 +1439,96 @@ void zs_free(struct zs_pool *pool, unsigned long handle) obj_free(class->size, obj); fullness = fix_fullness_group(class, zspage); - if (fullness == ZS_INUSE_RATIO_0) - free_zspage(pool, class, zspage); + if (fullness == ZS_INUSE_RATIO_0) { + if (trylock_zspage(zspage)) { + remove_zspage(class, zspage); + class_stat_sub(class, ZS_OBJS_ALLOCATED, + class->objs_per_zspage); + zspage_to_free = zspage; + } else + kick_deferred_free(pool); + } spin_unlock(&class->lock); + + if (likely(zspage_to_free)) { + __free_zspage_lockless(pool, zspage_to_free); + atomic_long_sub(class->pages_per_zspage, &pool->pages_allocated); + } cache_free_handle(handle); } EXPORT_SYMBOL_GPL(zs_free); +static void zs_deferred_free_work(struct work_struct *work) +{ + struct zs_pool *pool = container_of(work, struct zs_pool, + deferred_free_work); + unsigned long handle; + + while (1) { + spin_lock(&pool->deferred_lock); + if (pool->deferred_count == 0) { + spin_unlock(&pool->deferred_lock); + break; + } + handle = pool->deferred_handles[--pool->deferred_count]; + spin_unlock(&pool->deferred_lock); + + zs_free(pool, handle); + cond_resched(); + } +} + +/** + * zs_free_deferred - queue a handle for asynchronous freeing + * @pool: pool to free from + * @handle: handle to free + * + * Place @handle into a deferred free queue for later processing by a + * workqueue. This is intended for callers that are in atomic context + * (e.g. under a spinlock) and cannot afford the cost of zs_free() + * directly. When the queue reaches a threshold the work is scheduled. + * Falls back to synchronous zs_free() if the lock is contended (drain + * in progress) or if the queue is full. + */ +void zs_free_deferred(struct zs_pool *pool, unsigned long handle) +{ + if (IS_ERR_OR_NULL((void *)handle)) + return; + + if (!spin_trylock(&pool->deferred_lock)) + goto sync_free; + + if (pool->deferred_count >= pool->deferred_capacity) { + spin_unlock(&pool->deferred_lock); + goto sync_free; + } + + pool->deferred_handles[pool->deferred_count++] = handle; + if (pool->deferred_count >= ZS_DEFERRED_FREE_THRESHOLD) + queue_work(system_wq, &pool->deferred_free_work); + spin_unlock(&pool->deferred_lock); + return; + +sync_free: + zs_free(pool, handle); +} +EXPORT_SYMBOL_GPL(zs_free_deferred); + +/** + * zs_free_deferred_flush - flush all pending deferred frees + * @pool: pool to flush + * + * Wait for any scheduled work to complete, then drain any remaining + * handles. Must be called from process context. + */ +void zs_free_deferred_flush(struct zs_pool *pool) +{ + flush_work(&pool->deferred_free_work); + zs_deferred_free_work(&pool->deferred_free_work); +} +EXPORT_SYMBOL_GPL(zs_free_deferred_flush); + static void zs_object_copy(struct size_class *class, unsigned long dst, unsigned long src) { @@ -2083,6 +2196,18 @@ struct zs_pool *zs_create_pool(const char *name) rwlock_init(&pool->lock); atomic_set(&pool->compaction_in_progress, 0); + spin_lock_init(&pool->deferred_lock); + pool->deferred_capacity = ZS_DEFERRED_FREE_CAPACITY; + pool->deferred_handles = kvmalloc_array(pool->deferred_capacity, + sizeof(unsigned long), + GFP_KERNEL); + if (!pool->deferred_handles) { + kfree(pool); + return NULL; + } + pool->deferred_count = 0; + INIT_WORK(&pool->deferred_free_work, zs_deferred_free_work); + pool->name = kstrdup(name, GFP_KERNEL); if (!pool->name) goto err; @@ -2185,6 +2310,7 @@ void zs_destroy_pool(struct zs_pool *pool) int i; zs_unregister_shrinker(pool); + zs_free_deferred_flush(pool); zs_flush_migration(pool); zs_pool_stat_destroy(pool); @@ -2208,6 +2334,7 @@ void zs_destroy_pool(struct zs_pool *pool) kfree(class); } + kvfree(pool->deferred_handles); kfree(pool->name); kfree(pool); } diff --git a/mm/zswap.c b/mm/zswap.c index 4b5149173b0e..d858662a93af 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -761,11 +761,16 @@ static void zswap_entry_cache_free(struct zswap_entry *entry) /* * Carries out the common pattern of freeing an entry's zsmalloc allocation, * freeing the entry itself, and decrementing the number of stored pages. + * When @deferred is true, the zsmalloc handle is queued for async freeing + * instead of being freed immediately. */ -static void zswap_entry_free(struct zswap_entry *entry) +static void __zswap_entry_free(struct zswap_entry *entry, bool deferred) { zswap_lru_del(&zswap_list_lru, entry); - zs_free(entry->pool->zs_pool, entry->handle); + if (deferred) + zs_free_deferred(entry->pool->zs_pool, entry->handle); + else + zs_free(entry->pool->zs_pool, entry->handle); zswap_pool_put(entry->pool); if (entry->objcg) { obj_cgroup_uncharge_zswap(entry->objcg, entry->length); @@ -777,6 +782,11 @@ static void zswap_entry_free(struct zswap_entry *entry) atomic_long_dec(&zswap_stored_pages); } +static void zswap_entry_free(struct zswap_entry *entry) +{ + __zswap_entry_free(entry, false); +} + /********************************* * compressed storage functions **********************************/ @@ -1648,7 +1658,7 @@ void zswap_invalidate(swp_entry_t swp) entry = xa_erase(tree, offset); if (entry) - zswap_entry_free(entry); + __zswap_entry_free(entry, true); } int zswap_swapon(int type, unsigned long nr_pages)