Skip to content

Commit 18e2489

Browse files
committed
feat(amzn): Add interval polling for NVMe latency stats
Add --interval/-i option to the get-stats command that enables continuous polling of NVMe latency statistics at a user-specified interval. Each iteration computes and displays the delta from the previous snapshot, making it easy to observe real-time workload behavior. Signed-off-by: Zhen Wang <[email protected]>
1 parent 32f4203 commit 18e2489

1 file changed

Lines changed: 163 additions & 25 deletions

File tree

plugins/amzn/amzn-nvme.c

Lines changed: 163 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// SPDX-License-Identifier: GPL-2.0-or-later
22
#include <fcntl.h>
33
#include <errno.h>
4+
#include <signal.h>
45
#include <stdio.h>
56
#include <stdlib.h>
67
#include <unistd.h>
@@ -73,8 +74,8 @@ struct amzn_latency_log_page_base {
7374
__u64 total_write_time;
7475
__u64 ebs_volume_performance_exceeded_iops;
7576
__u64 ebs_volume_performance_exceeded_tp;
76-
__u64 ec2_instance_ebs_performance_exceeded_iops;
77-
__u64 ec2_instance_ebs_performance_exceeded_tp;
77+
__u64 ec2_instance_performance_exceeded_iops;
78+
__u64 ec2_instance_performance_exceeded_tp;
7879
__u64 volume_queue_length;
7980
__u8 reserved1[416];
8081

@@ -189,25 +190,21 @@ static void amzn_print_io_stats(struct amzn_latency_log_page *log_page)
189190
printf(" Read: %"PRIu64"\n", (uint64_t)base->total_read_time);
190191
printf(" Write: %"PRIu64"\n\n", (uint64_t)base->total_write_time);
191192

192-
if (is_local_storage(log_page)) {
193-
printf("EC2 Instance Local Storage Performance Exceeded (us):\n");
194-
printf(" IOPS: %"PRIu64"\n",
195-
(uint64_t)base->ec2_instance_ebs_performance_exceeded_iops);
196-
printf(" Throughput: %"PRIu64"\n\n",
197-
(uint64_t)base->ec2_instance_ebs_performance_exceeded_tp);
198-
} else {
193+
if (!is_local_storage(log_page)) {
199194
printf("EBS Volume Performance Exceeded (us):\n");
200195
printf(" IOPS: %"PRIu64"\n", (uint64_t)base->ebs_volume_performance_exceeded_iops);
201196
printf(" Throughput: %"PRIu64"\n\n",
202197
(uint64_t)base->ebs_volume_performance_exceeded_tp);
203-
printf("EC2 Instance EBS Performance Exceeded (us):\n");
204-
printf(" IOPS: %"PRIu64"\n",
205-
(uint64_t)base->ec2_instance_ebs_performance_exceeded_iops);
206-
printf(" Throughput: %"PRIu64"\n\n",
207-
(uint64_t)base->ec2_instance_ebs_performance_exceeded_tp);
208-
209198
}
210199

200+
printf("%s Performance Exceeded (us):\n",
201+
is_local_storage(log_page) ?
202+
"EC2 Instance Local Storage" : "EC2 Instance EBS");
203+
printf(" IOPS: %"PRIu64"\n",
204+
(uint64_t)base->ec2_instance_performance_exceeded_iops);
205+
printf(" Throughput: %"PRIu64"\n\n",
206+
(uint64_t)base->ec2_instance_performance_exceeded_tp);
207+
211208
printf("Queue Length (point in time): %"PRIu64"\n\n",
212209
(uint64_t)base->volume_queue_length);
213210
}
@@ -321,10 +318,10 @@ static void amzn_json_add_io_stats(struct json_object *root,
321318
obj_add_uint64(root, "ebs_volume_performance_exceeded_tp",
322319
base->ebs_volume_performance_exceeded_tp);
323320
obj_add_uint64(root,
324-
"ec2_instance_ebs_performance_exceeded_iops",
325-
base->ec2_instance_ebs_performance_exceeded_iops);
326-
obj_add_uint64(root, "ec2_instance_ebs_performance_exceeded_tp",
327-
base->ec2_instance_ebs_performance_exceeded_tp);
321+
"ec2_instance_performance_exceeded_iops",
322+
base->ec2_instance_performance_exceeded_iops);
323+
obj_add_uint64(root, "ec2_instance_performance_exceeded_tp",
324+
base->ec2_instance_performance_exceeded_tp);
328325
obj_add_uint64(root, "volume_queue_length", base->volume_queue_length);
329326

330327
}
@@ -463,17 +460,105 @@ static void amzn_print_json_stats(struct amzn_latency_log_page *log, bool detail
463460
#define amzn_print_json_stats(log, detail)
464461
#endif /* CONFIG_JSONC */
465462

463+
static sig_atomic_t amzn_keep_polling = 1;
464+
465+
static void amzn_sigint_handler(int sig)
466+
{
467+
(void)sig;
468+
amzn_keep_polling = 0;
469+
}
470+
471+
static void amzn_compute_histogram_diff(struct amzn_latency_histogram *diff,
472+
struct amzn_latency_histogram *curr,
473+
struct amzn_latency_histogram *prev)
474+
{
475+
diff->num_bins = curr->num_bins;
476+
for (int b = 0; b < curr->num_bins && b < 64; b++) {
477+
diff->bins[b].lower = curr->bins[b].lower;
478+
diff->bins[b].upper = curr->bins[b].upper;
479+
diff->bins[b].count = curr->bins[b].count - prev->bins[b].count;
480+
}
481+
}
482+
483+
static void amzn_compute_stats_diff(struct amzn_latency_log_page *diff,
484+
struct amzn_latency_log_page *curr,
485+
struct amzn_latency_log_page *prev)
486+
{
487+
struct amzn_latency_log_page_base *d = &diff->base;
488+
struct amzn_latency_log_page_base *c = &curr->base;
489+
struct amzn_latency_log_page_base *p = &prev->base;
490+
491+
d->magic = c->magic;
492+
d->version = c->version;
493+
d->total_read_ops = c->total_read_ops - p->total_read_ops;
494+
d->total_write_ops = c->total_write_ops - p->total_write_ops;
495+
d->total_read_bytes = c->total_read_bytes - p->total_read_bytes;
496+
d->total_write_bytes = c->total_write_bytes - p->total_write_bytes;
497+
d->total_read_time = c->total_read_time - p->total_read_time;
498+
d->total_write_time = c->total_write_time - p->total_write_time;
499+
d->ebs_volume_performance_exceeded_iops =
500+
c->ebs_volume_performance_exceeded_iops -
501+
p->ebs_volume_performance_exceeded_iops;
502+
d->ebs_volume_performance_exceeded_tp =
503+
c->ebs_volume_performance_exceeded_tp -
504+
p->ebs_volume_performance_exceeded_tp;
505+
d->ec2_instance_performance_exceeded_iops =
506+
c->ec2_instance_performance_exceeded_iops -
507+
p->ec2_instance_performance_exceeded_iops;
508+
d->ec2_instance_performance_exceeded_tp =
509+
c->ec2_instance_performance_exceeded_tp -
510+
p->ec2_instance_performance_exceeded_tp;
511+
512+
/* queue length is point-in-time, not cumulative */
513+
d->volume_queue_length = c->volume_queue_length;
514+
515+
amzn_compute_histogram_diff(&d->read_io_latency_histogram,
516+
&c->read_io_latency_histogram,
517+
&p->read_io_latency_histogram);
518+
amzn_compute_histogram_diff(&d->write_io_latency_histogram,
519+
&c->write_io_latency_histogram,
520+
&p->write_io_latency_histogram);
521+
522+
/* copy detail IO metadata from current */
523+
d->num_of_hists = c->num_of_hists;
524+
memcpy(d->hist_io_sizes, c->hist_io_sizes, sizeof(d->hist_io_sizes));
525+
526+
/* diff detail IO histogram counts */
527+
for (int i = 0; i < AMZN_NVME_STATS_NUM_HISTOGRAM; i++) {
528+
for (int b = 0; b < AMZN_NVME_STATS_NUM_HISTOGRAM_BINS; b++) {
529+
__u64 cr, pr, cw, pw;
530+
531+
cr = curr->detail_io.io_hist_array[i]
532+
.read_io_histogram_counts.counts[b];
533+
pr = prev->detail_io.io_hist_array[i]
534+
.read_io_histogram_counts.counts[b];
535+
cw = curr->detail_io.io_hist_array[i]
536+
.write_io_histogram_counts.counts[b];
537+
pw = prev->detail_io.io_hist_array[i]
538+
.write_io_histogram_counts.counts[b];
539+
540+
diff->detail_io.io_hist_array[i]
541+
.read_io_histogram_counts.counts[b] =
542+
cr - pr;
543+
diff->detail_io.io_hist_array[i]
544+
.write_io_histogram_counts.counts[b] =
545+
cw - pw;
546+
}
547+
}
548+
}
549+
466550
static int get_stats(int argc, char **argv, struct command *acmd,
467551
struct plugin *plugin)
468552
{
469553
const char *desc = "display command latency statistics";
470554
_cleanup_nvme_transport_handle_ struct nvme_transport_handle *hdl = NULL;
471555
_cleanup_nvme_global_ctx_ struct nvme_global_ctx *ctx = NULL;
472556
struct amzn_latency_log_page log = { 0 };
473-
nvme_print_flags_t flags = 0; // Initialize flags to 0
557+
nvme_print_flags_t flags = 0;
474558
struct nvme_passthru_cmd cmd;
475559
struct nvme_id_ctrl ctrl;
476560
bool detail = false;
561+
unsigned int interval = 0;
477562
size_t len;
478563
__u32 nsid = 1;
479564
int rc;
@@ -487,7 +572,10 @@ static int get_stats(int argc, char **argv, struct command *acmd,
487572
};
488573

489574
NVME_ARGS(opts,
490-
OPT_FLAG("details", 'd', &detail, "Detail IO histogram of each block size ranges"));
575+
OPT_FLAG("details", 'd', &detail,
576+
"Detail IO histogram of each block size ranges"),
577+
OPT_UINT("interval", 'i', &interval,
578+
"Polling interval in seconds"));
491579

492580
rc = parse_and_open(&ctx, &hdl, argc, argv, desc, opts);
493581
if (rc)
@@ -538,10 +626,60 @@ static int get_stats(int argc, char **argv, struct command *acmd,
538626
goto done;
539627
}
540628

541-
if (flags & JSON)
542-
amzn_print_json_stats(&log, detail);
543-
else
544-
amzn_print_normal_stats(&log, detail);
629+
if (interval > 0) {
630+
struct amzn_latency_log_page prev, curr, diff;
631+
struct sigaction sa = { .sa_handler = amzn_sigint_handler };
632+
633+
sigemptyset(&sa.sa_mask);
634+
sigaction(SIGINT, &sa, NULL);
635+
636+
printf("Polling NVMe stats every %u sec(s);"
637+
" press Ctrl+C to stop\n\n",
638+
interval);
639+
640+
/* print the initial absolute snapshot */
641+
if (flags & JSON)
642+
amzn_print_json_stats(&log, detail);
643+
else
644+
amzn_print_normal_stats(&log, detail);
645+
646+
prev = log;
647+
amzn_keep_polling = 1;
648+
649+
while (amzn_keep_polling) {
650+
sleep(interval);
651+
if (!amzn_keep_polling)
652+
break;
653+
654+
memset(&curr, 0, sizeof(curr));
655+
nvme_init_get_log(&cmd, nsid,
656+
AMZN_NVME_STATS_LOGPAGE_ID,
657+
NVME_CSI_NVM, &curr, len);
658+
rc = nvme_get_log(hdl, &cmd, false,
659+
NVME_LOG_PAGE_PDU_SIZE);
660+
if (rc != 0) {
661+
nvme_show_error("get log page failed, rc=%d",
662+
rc);
663+
goto done;
664+
}
665+
666+
memset(&diff, 0, sizeof(diff));
667+
amzn_compute_stats_diff(&diff, &curr, &prev);
668+
669+
if (flags & JSON)
670+
amzn_print_json_stats(&diff, detail);
671+
else
672+
amzn_print_normal_stats(&diff, detail);
673+
674+
prev = curr;
675+
printf("\n");
676+
}
677+
} else {
678+
if (flags & JSON)
679+
amzn_print_json_stats(&log, detail);
680+
else
681+
amzn_print_normal_stats(&log, detail);
682+
}
545683

546684
done:
547685
return rc;

0 commit comments

Comments
 (0)