Skip to content

Commit 5e91bec

Browse files
awz1234igaw
authored andcommitted
amzn: Add interval polling for NVMe latency stats
Add --interval/-i option to the amzn stats command that enables continuous polling of NVMe latency statistics at a user-specified interval. Each iteration computes and displays the delta from the previous snapshot, making it easy to observe real-time workload behavior. Signed-off-by: Zhen Wang <[email protected]>
1 parent e89daf6 commit 5e91bec

1 file changed

Lines changed: 162 additions & 25 deletions

File tree

plugins/amzn/amzn-nvme.c

Lines changed: 162 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// SPDX-License-Identifier: GPL-2.0-or-later
22
#include <fcntl.h>
33
#include <errno.h>
4+
#include <signal.h>
45
#include <stdio.h>
56
#include <stdlib.h>
67
#include <unistd.h>
@@ -73,8 +74,8 @@ struct amzn_latency_log_page_base {
7374
__u64 total_write_time;
7475
__u64 ebs_volume_performance_exceeded_iops;
7576
__u64 ebs_volume_performance_exceeded_tp;
76-
__u64 ec2_instance_ebs_performance_exceeded_iops;
77-
__u64 ec2_instance_ebs_performance_exceeded_tp;
77+
__u64 ec2_instance_performance_exceeded_iops;
78+
__u64 ec2_instance_performance_exceeded_tp;
7879
__u64 volume_queue_length;
7980
__u8 reserved1[416];
8081

@@ -189,25 +190,21 @@ static void amzn_print_io_stats(struct amzn_latency_log_page *log_page)
189190
printf(" Read: %"PRIu64"\n", (uint64_t)base->total_read_time);
190191
printf(" Write: %"PRIu64"\n\n", (uint64_t)base->total_write_time);
191192

192-
if (is_local_storage(log_page)) {
193-
printf("EC2 Instance Local Storage Performance Exceeded (us):\n");
194-
printf(" IOPS: %"PRIu64"\n",
195-
(uint64_t)base->ec2_instance_ebs_performance_exceeded_iops);
196-
printf(" Throughput: %"PRIu64"\n\n",
197-
(uint64_t)base->ec2_instance_ebs_performance_exceeded_tp);
198-
} else {
193+
if (!is_local_storage(log_page)) {
199194
printf("EBS Volume Performance Exceeded (us):\n");
200195
printf(" IOPS: %"PRIu64"\n", (uint64_t)base->ebs_volume_performance_exceeded_iops);
201196
printf(" Throughput: %"PRIu64"\n\n",
202197
(uint64_t)base->ebs_volume_performance_exceeded_tp);
203-
printf("EC2 Instance EBS Performance Exceeded (us):\n");
204-
printf(" IOPS: %"PRIu64"\n",
205-
(uint64_t)base->ec2_instance_ebs_performance_exceeded_iops);
206-
printf(" Throughput: %"PRIu64"\n\n",
207-
(uint64_t)base->ec2_instance_ebs_performance_exceeded_tp);
208-
209198
}
210199

200+
printf("%s Performance Exceeded (us):\n",
201+
is_local_storage(log_page) ?
202+
"EC2 Instance Local Storage" : "EC2 Instance EBS");
203+
printf(" IOPS: %"PRIu64"\n",
204+
(uint64_t)base->ec2_instance_performance_exceeded_iops);
205+
printf(" Throughput: %"PRIu64"\n\n",
206+
(uint64_t)base->ec2_instance_performance_exceeded_tp);
207+
211208
printf("Queue Length (point in time): %"PRIu64"\n\n",
212209
(uint64_t)base->volume_queue_length);
213210
}
@@ -321,10 +318,10 @@ static void amzn_json_add_io_stats(struct json_object *root,
321318
obj_add_uint64(root, "ebs_volume_performance_exceeded_tp",
322319
base->ebs_volume_performance_exceeded_tp);
323320
obj_add_uint64(root,
324-
"ec2_instance_ebs_performance_exceeded_iops",
325-
base->ec2_instance_ebs_performance_exceeded_iops);
326-
obj_add_uint64(root, "ec2_instance_ebs_performance_exceeded_tp",
327-
base->ec2_instance_ebs_performance_exceeded_tp);
321+
"ec2_instance_performance_exceeded_iops",
322+
base->ec2_instance_performance_exceeded_iops);
323+
obj_add_uint64(root, "ec2_instance_performance_exceeded_tp",
324+
base->ec2_instance_performance_exceeded_tp);
328325
obj_add_uint64(root, "volume_queue_length", base->volume_queue_length);
329326

330327
}
@@ -463,17 +460,114 @@ static void amzn_print_json_stats(struct amzn_latency_log_page *log, bool detail
463460
#define amzn_print_json_stats(log, detail)
464461
#endif /* CONFIG_JSONC */
465462

463+
static void amzn_print_stats(struct amzn_latency_log_page *log,
464+
bool detail, nvme_print_flags_t flags)
465+
{
466+
if (flags & JSON)
467+
amzn_print_json_stats(log, detail);
468+
else
469+
amzn_print_normal_stats(log, detail);
470+
}
471+
472+
static sig_atomic_t amzn_keep_polling = 1;
473+
474+
static void amzn_sigint_handler(int sig)
475+
{
476+
(void)sig;
477+
amzn_keep_polling = 0;
478+
}
479+
480+
static void amzn_compute_histogram_diff(struct amzn_latency_histogram *diff,
481+
struct amzn_latency_histogram *curr,
482+
struct amzn_latency_histogram *prev)
483+
{
484+
diff->num_bins = curr->num_bins;
485+
for (int b = 0; b < curr->num_bins && b < 64; b++) {
486+
diff->bins[b].lower = curr->bins[b].lower;
487+
diff->bins[b].upper = curr->bins[b].upper;
488+
diff->bins[b].count = curr->bins[b].count - prev->bins[b].count;
489+
}
490+
}
491+
492+
static void amzn_compute_stats_diff(struct amzn_latency_log_page *diff,
493+
struct amzn_latency_log_page *curr,
494+
struct amzn_latency_log_page *prev)
495+
{
496+
struct amzn_latency_log_page_base *d = &diff->base;
497+
struct amzn_latency_log_page_base *c = &curr->base;
498+
struct amzn_latency_log_page_base *p = &prev->base;
499+
500+
d->magic = c->magic;
501+
d->version = c->version;
502+
d->total_read_ops = c->total_read_ops - p->total_read_ops;
503+
d->total_write_ops = c->total_write_ops - p->total_write_ops;
504+
d->total_read_bytes = c->total_read_bytes - p->total_read_bytes;
505+
d->total_write_bytes = c->total_write_bytes - p->total_write_bytes;
506+
d->total_read_time = c->total_read_time - p->total_read_time;
507+
d->total_write_time = c->total_write_time - p->total_write_time;
508+
d->ebs_volume_performance_exceeded_iops =
509+
c->ebs_volume_performance_exceeded_iops -
510+
p->ebs_volume_performance_exceeded_iops;
511+
d->ebs_volume_performance_exceeded_tp =
512+
c->ebs_volume_performance_exceeded_tp -
513+
p->ebs_volume_performance_exceeded_tp;
514+
d->ec2_instance_performance_exceeded_iops =
515+
c->ec2_instance_performance_exceeded_iops -
516+
p->ec2_instance_performance_exceeded_iops;
517+
d->ec2_instance_performance_exceeded_tp =
518+
c->ec2_instance_performance_exceeded_tp -
519+
p->ec2_instance_performance_exceeded_tp;
520+
521+
/* queue length is point-in-time, not cumulative */
522+
d->volume_queue_length = c->volume_queue_length;
523+
524+
amzn_compute_histogram_diff(&d->read_io_latency_histogram,
525+
&c->read_io_latency_histogram,
526+
&p->read_io_latency_histogram);
527+
amzn_compute_histogram_diff(&d->write_io_latency_histogram,
528+
&c->write_io_latency_histogram,
529+
&p->write_io_latency_histogram);
530+
531+
/* copy detail IO metadata from current */
532+
d->num_of_hists = c->num_of_hists;
533+
memcpy(d->hist_io_sizes, c->hist_io_sizes, sizeof(d->hist_io_sizes));
534+
535+
/* diff detail IO histogram counts */
536+
for (int i = 0; i < AMZN_NVME_STATS_NUM_HISTOGRAM; i++) {
537+
for (int b = 0; b < AMZN_NVME_STATS_NUM_HISTOGRAM_BINS; b++) {
538+
__u64 cr, pr, cw, pw;
539+
540+
cr = curr->detail_io.io_hist_array[i]
541+
.read_io_histogram_counts.counts[b];
542+
pr = prev->detail_io.io_hist_array[i]
543+
.read_io_histogram_counts.counts[b];
544+
cw = curr->detail_io.io_hist_array[i]
545+
.write_io_histogram_counts.counts[b];
546+
pw = prev->detail_io.io_hist_array[i]
547+
.write_io_histogram_counts.counts[b];
548+
549+
diff->detail_io.io_hist_array[i]
550+
.read_io_histogram_counts.counts[b] =
551+
cr - pr;
552+
diff->detail_io.io_hist_array[i]
553+
.write_io_histogram_counts.counts[b] =
554+
cw - pw;
555+
}
556+
}
557+
}
558+
466559
static int get_stats(int argc, char **argv, struct command *acmd,
467560
struct plugin *plugin)
468561
{
469562
const char *desc = "display command latency statistics";
470563
_cleanup_nvme_transport_handle_ struct nvme_transport_handle *hdl = NULL;
471564
_cleanup_nvme_global_ctx_ struct nvme_global_ctx *ctx = NULL;
472565
struct amzn_latency_log_page log = { 0 };
473-
nvme_print_flags_t flags = 0; // Initialize flags to 0
566+
nvme_print_flags_t flags = 0;
474567
struct nvme_passthru_cmd cmd;
475568
struct nvme_id_ctrl ctrl;
476569
bool detail = false;
570+
unsigned int interval = 0;
477571
size_t len;
478572
__u32 nsid = 1;
479573
int rc;
@@ -487,7 +581,10 @@ static int get_stats(int argc, char **argv, struct command *acmd,
487581
};
488582

489583
NVME_ARGS(opts,
490-
OPT_FLAG("details", 'd', &detail, "Detail IO histogram of each block size ranges"));
584+
OPT_FLAG("details", 'd', &detail,
585+
"Detail IO histogram of each block size ranges"),
586+
OPT_UINT("interval", 'i', &interval,
587+
"Polling interval in seconds"));
491588

492589
rc = parse_and_open(&ctx, &hdl, argc, argv, desc, opts);
493590
if (rc)
@@ -538,10 +635,50 @@ static int get_stats(int argc, char **argv, struct command *acmd,
538635
goto done;
539636
}
540637

541-
if (flags & JSON)
542-
amzn_print_json_stats(&log, detail);
543-
else
544-
amzn_print_normal_stats(&log, detail);
638+
if (interval > 0) {
639+
struct amzn_latency_log_page prev, curr, diff;
640+
struct sigaction sa = { .sa_handler = amzn_sigint_handler };
641+
642+
sigemptyset(&sa.sa_mask);
643+
sigaction(SIGINT, &sa, NULL);
644+
645+
printf("Polling NVMe stats every %u sec(s);"
646+
" press Ctrl+C to stop\n\n",
647+
interval);
648+
649+
amzn_print_stats(&log, detail, flags);
650+
651+
prev = log;
652+
amzn_keep_polling = 1;
653+
654+
while (amzn_keep_polling) {
655+
sleep(interval);
656+
if (!amzn_keep_polling)
657+
break;
658+
659+
memset(&curr, 0, sizeof(curr));
660+
nvme_init_get_log(&cmd, nsid,
661+
AMZN_NVME_STATS_LOGPAGE_ID,
662+
NVME_CSI_NVM, &curr, len);
663+
rc = nvme_get_log(hdl, &cmd, false,
664+
NVME_LOG_PAGE_PDU_SIZE);
665+
if (rc != 0) {
666+
nvme_show_error("get log page failed, rc=%d",
667+
rc);
668+
goto done;
669+
}
670+
671+
memset(&diff, 0, sizeof(diff));
672+
amzn_compute_stats_diff(&diff, &curr, &prev);
673+
674+
amzn_print_stats(&diff, detail, flags);
675+
676+
prev = curr;
677+
printf("\n");
678+
}
679+
} else {
680+
amzn_print_stats(&log, detail, flags);
681+
}
545682

546683
done:
547684
return rc;

0 commit comments

Comments
 (0)