Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions block/Kconfig.iosched
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,12 @@ config BFQ_CGROUP_DEBUG
Enable some debugging help. Currently it exports additional stat
files in a cgroup which can be useful for debugging.

config IOSCHED_UFQ
tristate "UFQ I/O scheduler"
default y
help
The UFQ I/O scheduler is a programmable I/O scheduler. When
enabled, an out-of-kernel I/O scheduler based on eBPF can be
designed to interact with it, leveraging its customizable
hooks to redefine I/O scheduling policies.
endmenu
1 change: 1 addition & 0 deletions block/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o
obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o
bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
obj-$(CONFIG_IOSCHED_BFQ) += bfq.o
obj-$(CONFIG_IOSCHED_UFQ) += ufq-iosched.o ufq-bpfops.o ufq-kfunc.o

obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o \
bio-integrity-auto.o bio-integrity-fs.o
Expand Down
49 changes: 43 additions & 6 deletions block/blk-merge.c
Original file line number Diff line number Diff line change
Expand Up @@ -774,8 +774,8 @@ u8 bio_seg_gap(struct request_queue *q, struct bio *prev, struct bio *next,
* For non-mq, this has to be called with the request spinlock acquired.
* For mq with scheduling, the appropriate queue wide lock should be held.
*/
static struct request *attempt_merge(struct request_queue *q,
struct request *req, struct request *next)
static struct request *attempt_merge(struct request_queue *q, struct request *req,
struct request *next, bool nohash)
{
if (!rq_mergeable(req) || !rq_mergeable(next))
return NULL;
Expand Down Expand Up @@ -842,7 +842,7 @@ static struct request *attempt_merge(struct request_queue *q,

req->__data_len += blk_rq_bytes(next);

if (!blk_discard_mergable(req))
if (!nohash && !blk_discard_mergable(req))
elv_merge_requests(q, req, next);

blk_crypto_rq_put_keyslot(next);
Expand All @@ -868,7 +868,7 @@ static struct request *attempt_back_merge(struct request_queue *q,
struct request *next = elv_latter_request(q, rq);

if (next)
return attempt_merge(q, rq, next);
return attempt_merge(q, rq, next, false);

return NULL;
}
Expand All @@ -879,11 +879,17 @@ static struct request *attempt_front_merge(struct request_queue *q,
struct request *prev = elv_former_request(q, rq);

if (prev)
return attempt_merge(q, prev, rq);
return attempt_merge(q, prev, rq, false);

return NULL;
}

struct request *bpf_attempt_merge(struct request_queue *q, struct request *rq,
struct request *next)
{
return attempt_merge(q, rq, next, true);
}

/*
* Try to merge 'next' into 'rq'. Return true if the merge happened, false
* otherwise. The caller is responsible for freeing 'next' if the merge
Expand All @@ -892,7 +898,7 @@ static struct request *attempt_front_merge(struct request_queue *q,
bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
struct request *next)
{
return attempt_merge(q, rq, next);
return attempt_merge(q, rq, next, false);
}

bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
Expand Down Expand Up @@ -1169,3 +1175,34 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
}
}
EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);

bool blk_mq_sched_merge_fn(struct request_queue *q, struct bio *bio,
unsigned int nr_segs, struct request **merged_request,
struct request *rq, enum elv_merge type, void (*fn)
(struct request_queue *, struct request *, enum elv_merge))
{
switch (type) {
case ELEVATOR_BACK_MERGE:
if (!blk_mq_sched_allow_merge(q, rq, bio))
return false;
if (bio_attempt_back_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
return false;
*merged_request = attempt_back_merge(q, rq);
if (!*merged_request)
fn(q, rq, ELEVATOR_BACK_MERGE);
return true;
case ELEVATOR_FRONT_MERGE:
if (!blk_mq_sched_allow_merge(q, rq, bio))
return false;
if (bio_attempt_front_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
return false;
*merged_request = attempt_front_merge(q, rq);
if (!*merged_request)
fn(q, rq, ELEVATOR_FRONT_MERGE);
return true;
case ELEVATOR_DISCARD_MERGE:
return bio_attempt_discard_merge(q, rq, bio) == BIO_MERGE_OK;
default:
return false;
}
}
4 changes: 4 additions & 0 deletions block/blk-mq-sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@

#define MAX_SCHED_RQ (16 * BLKDEV_DEFAULT_RQ)

bool blk_mq_sched_merge_fn(struct request_queue *q, struct bio *bio,
unsigned int nr_segs, struct request **merged_request,
struct request *rq, enum elv_merge type, void (*fn)
(struct request_queue *, struct request *, enum elv_merge));
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs, struct request **merged_request);
bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
Expand Down
8 changes: 7 additions & 1 deletion block/blk-mq.c
Original file line number Diff line number Diff line change
Expand Up @@ -796,7 +796,7 @@ static void blk_mq_finish_request(struct request *rq)
}
}

static void __blk_mq_free_request(struct request *rq)
void __blk_mq_free_request(struct request *rq)
{
struct request_queue *q = rq->q;
struct blk_mq_ctx *ctx = rq->mq_ctx;
Expand Down Expand Up @@ -1844,6 +1844,12 @@ static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr,
if (list_empty(&ctx->rq_lists[type]))
sbitmap_clear_bit(sb, bitnr);
}

if (dispatch_data->rq) {
dispatch_data->rq->rq_flags |= RQF_STARTED;
if (hctx->queue->last_merge == dispatch_data->rq)
hctx->queue->last_merge = NULL;
}
spin_unlock(&ctx->lock);

return !dispatch_data->rq;
Expand Down
2 changes: 1 addition & 1 deletion block/blk-mq.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *start);
void blk_mq_put_rq_ref(struct request *rq);

void __blk_mq_free_request(struct request *rq);
/*
* Internal helpers for allocating/freeing the request map
*/
Expand Down
2 changes: 2 additions & 0 deletions block/blk.h
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,8 @@ static inline unsigned get_max_segment_size(const struct queue_limits *lim,

int ll_back_merge_fn(struct request *req, struct bio *bio,
unsigned int nr_segs);
struct request *bpf_attempt_merge(struct request_queue *q, struct request *rq,
struct request *next);
bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
struct request *next);
unsigned int blk_recalc_rq_segments(struct request *rq);
Expand Down
213 changes: 213 additions & 0 deletions block/ufq-bpfops.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2026 KylinSoft Corporation.
* Copyright (c) 2026 Kaitao Cheng <[email protected]>
*/
#include <linux/init.h>
#include <linux/types.h>
#include <linux/bpf_verifier.h>
#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/btf_ids.h>
#include <linux/string.h>
#include "ufq-iosched.h"

struct ufq_iosched_ops ufq_ops;

static const struct bpf_func_proto *
bpf_ufq_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
return bpf_base_func_proto(func_id, prog);
}

static bool bpf_ufq_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
if (type != BPF_READ)
return false;
if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS)
return false;
if (off % size != 0)
return false;

/*
* merge_req's third argument is int *type. btf_ctx_access() treats
* pointers that are not "pointer to struct" as scalars (no reg_type),
* so loading the pointer from ctx leaves a SCALAR and *type stores
* fail verification. Model it as a read/write buffer of merge_type.
*/
if (off == 16 && size == sizeof(__u64) &&
prog->aux->attach_func_name &&
!strcmp(prog->aux->attach_func_name, "merge_req")) {
if (!btf_ctx_access(off, size, type, prog, info))
return false;
info->reg_type = PTR_TO_BUF;
return true;
}

return btf_ctx_access(off, size, type, prog, info);
}

static const struct bpf_verifier_ops bpf_ufq_verifier_ops = {
.get_func_proto = bpf_ufq_get_func_proto,
.is_valid_access = bpf_ufq_is_valid_access,
};

static int bpf_ufq_init_member(const struct btf_type *t,
const struct btf_member *member,
void *kdata, const void *udata)
{
const struct ufq_iosched_ops *uops = udata;
struct ufq_iosched_ops *ops = kdata;
u32 moff = __btf_member_bit_offset(t, member) / 8;
int ret;

switch (moff) {
case offsetof(struct ufq_iosched_ops, name):
ret = bpf_obj_name_cpy(ops->name, uops->name,
sizeof(ops->name));
if (ret < 0)
return ret;
if (ret == 0)
return -EINVAL;
return 1;
/* other var adding .... */
}

return 0;
}

static int bpf_ufq_check_member(const struct btf_type *t,
const struct btf_member *member,
const struct bpf_prog *prog)
{
return 0;
}

static int bpf_ufq_enable(struct ufq_iosched_ops *ops)
{
ufq_ops = *ops;
return 0;
}

static void bpf_ufq_disable(struct ufq_iosched_ops *ops)
{
memset(&ufq_ops, 0, sizeof(ufq_ops));
}

static int bpf_ufq_reg(void *kdata, struct bpf_link *link)
{
return bpf_ufq_enable(kdata);
}

static void bpf_ufq_unreg(void *kdata, struct bpf_link *link)
{
bpf_ufq_disable(kdata);
}

static int bpf_ufq_init(struct btf *btf)
{
return 0;
}

static int bpf_ufq_update(void *kdata, void *old_kdata, struct bpf_link *link)
{
/*
* UFQ does not support live-updating an already-attached BPF scheduler:
* partial failure during callback setup (e.g. init_sched) would be hard
* to reason about, and update can race with unregister/teardown.
*/
return -EOPNOTSUPP;
}

static int bpf_ufq_validate(void *kdata)
{
return 0;
}

static int init_sched_stub(struct request_queue *q)
{
return -EPERM;
}

static int exit_sched_stub(struct request_queue *q)
{
return -EPERM;
}

static int insert_req_stub(struct request_queue *q, struct request *rq,
blk_insert_t flags)
{
return 0;
}

static struct request *dispatch_req_stub(struct request_queue *q)
{
return NULL;
}

static bool has_req_stub(struct request_queue *q, int rqs_count)
{
return rqs_count > 0;
}

static void finish_req_stub(struct request *rq)
{
}

static struct request *former_req_stub(struct request_queue *q, struct request *rq)
{
return NULL;
}

static struct request *next_req_stub(struct request_queue *q, struct request *rq)
{
return NULL;
}

static struct request *merge_req_stub(struct request_queue *q, struct request *rq,
int *type)
{
*type = ELEVATOR_NO_MERGE;
return NULL;
}

static void req_merged_stub(struct request_queue *q, struct request *rq,
int type)
{
}

static struct ufq_iosched_ops __bpf_ops_ufq_ops = {
.init_sched = init_sched_stub,
.exit_sched = exit_sched_stub,
.insert_req = insert_req_stub,
.dispatch_req = dispatch_req_stub,
.has_req = has_req_stub,
.former_req = former_req_stub,
.next_req = next_req_stub,
.merge_req = merge_req_stub,
.req_merged = req_merged_stub,
.finish_req = finish_req_stub,
};

static struct bpf_struct_ops bpf_iosched_ufq_ops = {
.verifier_ops = &bpf_ufq_verifier_ops,
.reg = bpf_ufq_reg,
.unreg = bpf_ufq_unreg,
.check_member = bpf_ufq_check_member,
.init_member = bpf_ufq_init_member,
.init = bpf_ufq_init,
.update = bpf_ufq_update,
.validate = bpf_ufq_validate,
.name = "ufq_iosched_ops",
.owner = THIS_MODULE,
.cfi_stubs = &__bpf_ops_ufq_ops
};

int bpf_ufq_ops_init(void)
{
return register_bpf_struct_ops(&bpf_iosched_ufq_ops, ufq_iosched_ops);
}

Loading
Loading