Skip to content

Commit a46ea21

Browse files
committed
feat(amzn): Add interval polling for NVMe latency stats
Add --interval/-i option to the get-stats command that enables continuous polling of NVMe latency statistics at a user-specified interval. Each iteration computes and displays the delta from the previous snapshot, making it easy to observe real-time workload behavior. Signed-off-by: Zhen Wang <[email protected]>
1 parent 32f4203 commit a46ea21

1 file changed

Lines changed: 160 additions & 25 deletions

File tree

plugins/amzn/amzn-nvme.c

Lines changed: 160 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// SPDX-License-Identifier: GPL-2.0-or-later
22
#include <fcntl.h>
33
#include <errno.h>
4+
#include <signal.h>
45
#include <stdio.h>
56
#include <stdlib.h>
67
#include <unistd.h>
@@ -73,8 +74,8 @@ struct amzn_latency_log_page_base {
7374
__u64 total_write_time;
7475
__u64 ebs_volume_performance_exceeded_iops;
7576
__u64 ebs_volume_performance_exceeded_tp;
76-
__u64 ec2_instance_ebs_performance_exceeded_iops;
77-
__u64 ec2_instance_ebs_performance_exceeded_tp;
77+
__u64 ec2_instance_performance_exceeded_iops;
78+
__u64 ec2_instance_performance_exceeded_tp;
7879
__u64 volume_queue_length;
7980
__u8 reserved1[416];
8081

@@ -189,25 +190,21 @@ static void amzn_print_io_stats(struct amzn_latency_log_page *log_page)
189190
printf(" Read: %"PRIu64"\n", (uint64_t)base->total_read_time);
190191
printf(" Write: %"PRIu64"\n\n", (uint64_t)base->total_write_time);
191192

192-
if (is_local_storage(log_page)) {
193-
printf("EC2 Instance Local Storage Performance Exceeded (us):\n");
194-
printf(" IOPS: %"PRIu64"\n",
195-
(uint64_t)base->ec2_instance_ebs_performance_exceeded_iops);
196-
printf(" Throughput: %"PRIu64"\n\n",
197-
(uint64_t)base->ec2_instance_ebs_performance_exceeded_tp);
198-
} else {
193+
if (!is_local_storage(log_page)) {
199194
printf("EBS Volume Performance Exceeded (us):\n");
200195
printf(" IOPS: %"PRIu64"\n", (uint64_t)base->ebs_volume_performance_exceeded_iops);
201196
printf(" Throughput: %"PRIu64"\n\n",
202197
(uint64_t)base->ebs_volume_performance_exceeded_tp);
203-
printf("EC2 Instance EBS Performance Exceeded (us):\n");
204-
printf(" IOPS: %"PRIu64"\n",
205-
(uint64_t)base->ec2_instance_ebs_performance_exceeded_iops);
206-
printf(" Throughput: %"PRIu64"\n\n",
207-
(uint64_t)base->ec2_instance_ebs_performance_exceeded_tp);
208-
209198
}
210199

200+
printf("%s Performance Exceeded (us):\n",
201+
is_local_storage(log_page) ?
202+
"EC2 Instance Local Storage" : "EC2 Instance EBS");
203+
printf(" IOPS: %"PRIu64"\n",
204+
(uint64_t)base->ec2_instance_performance_exceeded_iops);
205+
printf(" Throughput: %"PRIu64"\n\n",
206+
(uint64_t)base->ec2_instance_performance_exceeded_tp);
207+
211208
printf("Queue Length (point in time): %"PRIu64"\n\n",
212209
(uint64_t)base->volume_queue_length);
213210
}
@@ -321,10 +318,10 @@ static void amzn_json_add_io_stats(struct json_object *root,
321318
obj_add_uint64(root, "ebs_volume_performance_exceeded_tp",
322319
base->ebs_volume_performance_exceeded_tp);
323320
obj_add_uint64(root,
324-
"ec2_instance_ebs_performance_exceeded_iops",
325-
base->ec2_instance_ebs_performance_exceeded_iops);
326-
obj_add_uint64(root, "ec2_instance_ebs_performance_exceeded_tp",
327-
base->ec2_instance_ebs_performance_exceeded_tp);
321+
"ec2_instance_performance_exceeded_iops",
322+
base->ec2_instance_performance_exceeded_iops);
323+
obj_add_uint64(root, "ec2_instance_performance_exceeded_tp",
324+
base->ec2_instance_performance_exceeded_tp);
328325
obj_add_uint64(root, "volume_queue_length", base->volume_queue_length);
329326

330327
}
@@ -463,17 +460,100 @@ static void amzn_print_json_stats(struct amzn_latency_log_page *log, bool detail
463460
#define amzn_print_json_stats(log, detail)
464461
#endif /* CONFIG_JSONC */
465462

463+
static sig_atomic_t amzn_keep_polling = 1;
464+
465+
static void amzn_sigint_handler(int sig)
466+
{
467+
(void)sig;
468+
amzn_keep_polling = 0;
469+
}
470+
471+
static void amzn_compute_histogram_diff(struct amzn_latency_histogram *diff,
472+
struct amzn_latency_histogram *curr,
473+
struct amzn_latency_histogram *prev)
474+
{
475+
diff->num_bins = curr->num_bins;
476+
for (int b = 0; b < curr->num_bins && b < 64; b++) {
477+
diff->bins[b].lower = curr->bins[b].lower;
478+
diff->bins[b].upper = curr->bins[b].upper;
479+
diff->bins[b].count = curr->bins[b].count - prev->bins[b].count;
480+
}
481+
}
482+
483+
static void amzn_compute_stats_diff(struct amzn_latency_log_page *diff,
484+
struct amzn_latency_log_page *curr,
485+
struct amzn_latency_log_page *prev)
486+
{
487+
struct amzn_latency_log_page_base *d = &diff->base;
488+
struct amzn_latency_log_page_base *c = &curr->base;
489+
struct amzn_latency_log_page_base *p = &prev->base;
490+
491+
d->magic = c->magic;
492+
d->version = c->version;
493+
d->total_read_ops = c->total_read_ops - p->total_read_ops;
494+
d->total_write_ops = c->total_write_ops - p->total_write_ops;
495+
d->total_read_bytes = c->total_read_bytes - p->total_read_bytes;
496+
d->total_write_bytes = c->total_write_bytes - p->total_write_bytes;
497+
d->total_read_time = c->total_read_time - p->total_read_time;
498+
d->total_write_time = c->total_write_time - p->total_write_time;
499+
d->ebs_volume_performance_exceeded_iops =
500+
c->ebs_volume_performance_exceeded_iops -
501+
p->ebs_volume_performance_exceeded_iops;
502+
d->ebs_volume_performance_exceeded_tp =
503+
c->ebs_volume_performance_exceeded_tp -
504+
p->ebs_volume_performance_exceeded_tp;
505+
d->ec2_instance_performance_exceeded_iops =
506+
c->ec2_instance_performance_exceeded_iops -
507+
p->ec2_instance_performance_exceeded_iops;
508+
d->ec2_instance_performance_exceeded_tp =
509+
c->ec2_instance_performance_exceeded_tp -
510+
p->ec2_instance_performance_exceeded_tp;
511+
512+
/* queue length is point-in-time, not cumulative */
513+
d->volume_queue_length = c->volume_queue_length;
514+
515+
amzn_compute_histogram_diff(&d->read_io_latency_histogram,
516+
&c->read_io_latency_histogram,
517+
&p->read_io_latency_histogram);
518+
amzn_compute_histogram_diff(&d->write_io_latency_histogram,
519+
&c->write_io_latency_histogram,
520+
&p->write_io_latency_histogram);
521+
522+
/* copy detail IO metadata from current */
523+
d->num_of_hists = c->num_of_hists;
524+
memcpy(d->hist_io_sizes, c->hist_io_sizes, sizeof(d->hist_io_sizes));
525+
526+
/* diff detail IO histogram counts */
527+
for (int i = 0; i < AMZN_NVME_STATS_NUM_HISTOGRAM; i++) {
528+
struct amzn_latency_io_histogram *dh, *ch, *ph;
529+
530+
dh = &diff->detail_io.io_hist_array[i];
531+
ch = &curr->detail_io.io_hist_array[i];
532+
ph = &prev->detail_io.io_hist_array[i];
533+
534+
for (int b = 0; b < AMZN_NVME_STATS_NUM_HISTOGRAM_BINS; b++) {
535+
dh->read_io_histogram_counts.counts[b] =
536+
ch->read_io_histogram_counts.counts[b] -
537+
ph->read_io_histogram_counts.counts[b];
538+
dh->write_io_histogram_counts.counts[b] =
539+
ch->write_io_histogram_counts.counts[b] -
540+
ph->write_io_histogram_counts.counts[b];
541+
}
542+
}
543+
}
544+
466545
static int get_stats(int argc, char **argv, struct command *acmd,
467546
struct plugin *plugin)
468547
{
469548
const char *desc = "display command latency statistics";
470549
_cleanup_nvme_transport_handle_ struct nvme_transport_handle *hdl = NULL;
471550
_cleanup_nvme_global_ctx_ struct nvme_global_ctx *ctx = NULL;
472551
struct amzn_latency_log_page log = { 0 };
473-
nvme_print_flags_t flags = 0; // Initialize flags to 0
552+
nvme_print_flags_t flags = 0;
474553
struct nvme_passthru_cmd cmd;
475554
struct nvme_id_ctrl ctrl;
476555
bool detail = false;
556+
unsigned int interval = 0;
477557
size_t len;
478558
__u32 nsid = 1;
479559
int rc;
@@ -487,7 +567,10 @@ static int get_stats(int argc, char **argv, struct command *acmd,
487567
};
488568

489569
NVME_ARGS(opts,
490-
OPT_FLAG("details", 'd', &detail, "Detail IO histogram of each block size ranges"));
570+
OPT_FLAG("details", 'd', &detail,
571+
"Detail IO histogram of each block size ranges"),
572+
OPT_UINT("interval", 'i', &interval,
573+
"Polling interval in seconds"));
491574

492575
rc = parse_and_open(&ctx, &hdl, argc, argv, desc, opts);
493576
if (rc)
@@ -538,10 +621,62 @@ static int get_stats(int argc, char **argv, struct command *acmd,
538621
goto done;
539622
}
540623

541-
if (flags & JSON)
542-
amzn_print_json_stats(&log, detail);
543-
else
544-
amzn_print_normal_stats(&log, detail);
624+
if (interval > 0) {
625+
struct amzn_latency_log_page prev, curr, diff;
626+
struct sigaction sa = { .sa_handler = amzn_sigint_handler };
627+
628+
sigemptyset(&sa.sa_mask);
629+
sigaction(SIGINT, &sa, NULL);
630+
631+
printf("Polling NVMe stats every %u sec(s);"
632+
" press Ctrl+C to stop\n\n",
633+
interval);
634+
635+
/* print the initial absolute snapshot */
636+
if (flags & JSON)
637+
amzn_print_json_stats(&log, detail);
638+
else
639+
amzn_print_normal_stats(&log, detail);
640+
641+
prev = log;
642+
amzn_keep_polling = 1;
643+
644+
while (amzn_keep_polling) {
645+
sleep(interval);
646+
if (!amzn_keep_polling)
647+
break;
648+
649+
memset(&curr, 0, sizeof(curr));
650+
nvme_init_get_log(&cmd, nsid,
651+
AMZN_NVME_STATS_LOGPAGE_ID,
652+
NVME_CSI_NVM, &curr, len);
653+
rc = nvme_get_log(hdl, &cmd, false,
654+
NVME_LOG_PAGE_PDU_SIZE);
655+
if (rc != 0) {
656+
fprintf(stderr,
657+
"[ERROR] %s: Failed to get"
658+
" log page, rc = %d\n",
659+
__func__, rc);
660+
goto done;
661+
}
662+
663+
memset(&diff, 0, sizeof(diff));
664+
amzn_compute_stats_diff(&diff, &curr, &prev);
665+
666+
if (flags & JSON)
667+
amzn_print_json_stats(&diff, detail);
668+
else
669+
amzn_print_normal_stats(&diff, detail);
670+
671+
prev = curr;
672+
printf("\n");
673+
}
674+
} else {
675+
if (flags & JSON)
676+
amzn_print_json_stats(&log, detail);
677+
else
678+
amzn_print_normal_stats(&log, detail);
679+
}
545680

546681
done:
547682
return rc;

0 commit comments

Comments
 (0)