Skip to content

Commit 9bcdb34

Browse files
committed
amzn: Add interval polling support for local storage stats
Add -i/--interval option to the stats command that enables periodic polling of NVMe statistics for local storage devices. When enabled, the command queries stats at the specified interval and prints the delta between consecutive snapshots. Ctrl+C stops the polling loop. This feature is restricted to local storage devices only; using it on EBS devices returns an error. The EBS code path is unchanged. Also rename ec2_instance_ebs_performance_exceeded fields to ec2_instance_performance_exceeded since they are used by both EBS and local storage devices, and deduplicate the associated print statements.
1 parent 32f4203 commit 9bcdb34

1 file changed

Lines changed: 149 additions & 25 deletions

File tree

plugins/amzn/amzn-nvme.c

Lines changed: 149 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// SPDX-License-Identifier: GPL-2.0-or-later
22
#include <fcntl.h>
33
#include <errno.h>
4+
#include <signal.h>
45
#include <stdio.h>
56
#include <stdlib.h>
67
#include <unistd.h>
@@ -73,8 +74,8 @@ struct amzn_latency_log_page_base {
7374
__u64 total_write_time;
7475
__u64 ebs_volume_performance_exceeded_iops;
7576
__u64 ebs_volume_performance_exceeded_tp;
76-
__u64 ec2_instance_ebs_performance_exceeded_iops;
77-
__u64 ec2_instance_ebs_performance_exceeded_tp;
77+
__u64 ec2_instance_performance_exceeded_iops;
78+
__u64 ec2_instance_performance_exceeded_tp;
7879
__u64 volume_queue_length;
7980
__u8 reserved1[416];
8081

@@ -189,25 +190,21 @@ static void amzn_print_io_stats(struct amzn_latency_log_page *log_page)
189190
printf(" Read: %"PRIu64"\n", (uint64_t)base->total_read_time);
190191
printf(" Write: %"PRIu64"\n\n", (uint64_t)base->total_write_time);
191192

192-
if (is_local_storage(log_page)) {
193-
printf("EC2 Instance Local Storage Performance Exceeded (us):\n");
194-
printf(" IOPS: %"PRIu64"\n",
195-
(uint64_t)base->ec2_instance_ebs_performance_exceeded_iops);
196-
printf(" Throughput: %"PRIu64"\n\n",
197-
(uint64_t)base->ec2_instance_ebs_performance_exceeded_tp);
198-
} else {
193+
if (!is_local_storage(log_page)) {
199194
printf("EBS Volume Performance Exceeded (us):\n");
200195
printf(" IOPS: %"PRIu64"\n", (uint64_t)base->ebs_volume_performance_exceeded_iops);
201196
printf(" Throughput: %"PRIu64"\n\n",
202197
(uint64_t)base->ebs_volume_performance_exceeded_tp);
203-
printf("EC2 Instance EBS Performance Exceeded (us):\n");
204-
printf(" IOPS: %"PRIu64"\n",
205-
(uint64_t)base->ec2_instance_ebs_performance_exceeded_iops);
206-
printf(" Throughput: %"PRIu64"\n\n",
207-
(uint64_t)base->ec2_instance_ebs_performance_exceeded_tp);
208-
209198
}
210199

200+
printf("%s Performance Exceeded (us):\n",
201+
is_local_storage(log_page) ?
202+
"EC2 Instance Local Storage" : "EC2 Instance EBS");
203+
printf(" IOPS: %"PRIu64"\n",
204+
(uint64_t)base->ec2_instance_performance_exceeded_iops);
205+
printf(" Throughput: %"PRIu64"\n\n",
206+
(uint64_t)base->ec2_instance_performance_exceeded_tp);
207+
211208
printf("Queue Length (point in time): %"PRIu64"\n\n",
212209
(uint64_t)base->volume_queue_length);
213210
}
@@ -321,10 +318,10 @@ static void amzn_json_add_io_stats(struct json_object *root,
321318
obj_add_uint64(root, "ebs_volume_performance_exceeded_tp",
322319
base->ebs_volume_performance_exceeded_tp);
323320
obj_add_uint64(root,
324-
"ec2_instance_ebs_performance_exceeded_iops",
325-
base->ec2_instance_ebs_performance_exceeded_iops);
326-
obj_add_uint64(root, "ec2_instance_ebs_performance_exceeded_tp",
327-
base->ec2_instance_ebs_performance_exceeded_tp);
321+
"ec2_instance_performance_exceeded_iops",
322+
base->ec2_instance_performance_exceeded_iops);
323+
obj_add_uint64(root, "ec2_instance_performance_exceeded_tp",
324+
base->ec2_instance_performance_exceeded_tp);
328325
obj_add_uint64(root, "volume_queue_length", base->volume_queue_length);
329326

330327
}
@@ -463,17 +460,93 @@ static void amzn_print_json_stats(struct amzn_latency_log_page *log, bool detail
463460
#define amzn_print_json_stats(log, detail)
464461
#endif /* CONFIG_JSONC */
465462

463+
static volatile sig_atomic_t amzn_keep_polling = 1;
464+
465+
static void amzn_sigint_handler(int sig)
466+
{
467+
(void)sig;
468+
amzn_keep_polling = 0;
469+
}
470+
471+
static void amzn_compute_histogram_diff(struct amzn_latency_histogram *diff,
472+
struct amzn_latency_histogram *curr,
473+
struct amzn_latency_histogram *prev)
474+
{
475+
diff->num_bins = curr->num_bins;
476+
for (int b = 0; b < curr->num_bins && b < 64; b++) {
477+
diff->bins[b].lower = curr->bins[b].lower;
478+
diff->bins[b].upper = curr->bins[b].upper;
479+
diff->bins[b].count = curr->bins[b].count - prev->bins[b].count;
480+
}
481+
}
482+
483+
static void amzn_compute_stats_diff(struct amzn_latency_log_page *diff,
484+
struct amzn_latency_log_page *curr,
485+
struct amzn_latency_log_page *prev)
486+
{
487+
struct amzn_latency_log_page_base *d = &diff->base;
488+
struct amzn_latency_log_page_base *c = &curr->base;
489+
struct amzn_latency_log_page_base *p = &prev->base;
490+
491+
d->magic = c->magic;
492+
d->version = c->version;
493+
d->total_read_ops = c->total_read_ops - p->total_read_ops;
494+
d->total_write_ops = c->total_write_ops - p->total_write_ops;
495+
d->total_read_bytes = c->total_read_bytes - p->total_read_bytes;
496+
d->total_write_bytes = c->total_write_bytes - p->total_write_bytes;
497+
d->total_read_time = c->total_read_time - p->total_read_time;
498+
d->total_write_time = c->total_write_time - p->total_write_time;
499+
d->ebs_volume_performance_exceeded_iops =
500+
c->ebs_volume_performance_exceeded_iops - p->ebs_volume_performance_exceeded_iops;
501+
d->ebs_volume_performance_exceeded_tp =
502+
c->ebs_volume_performance_exceeded_tp - p->ebs_volume_performance_exceeded_tp;
503+
d->ec2_instance_performance_exceeded_iops =
504+
c->ec2_instance_performance_exceeded_iops -
505+
p->ec2_instance_performance_exceeded_iops;
506+
d->ec2_instance_performance_exceeded_tp =
507+
c->ec2_instance_performance_exceeded_tp -
508+
p->ec2_instance_performance_exceeded_tp;
509+
510+
/* queue length is point-in-time, not cumulative */
511+
d->volume_queue_length = c->volume_queue_length;
512+
513+
amzn_compute_histogram_diff(&d->read_io_latency_histogram,
514+
&c->read_io_latency_histogram,
515+
&p->read_io_latency_histogram);
516+
amzn_compute_histogram_diff(&d->write_io_latency_histogram,
517+
&c->write_io_latency_histogram,
518+
&p->write_io_latency_histogram);
519+
520+
/* copy detail IO metadata from current */
521+
d->num_of_hists = c->num_of_hists;
522+
memcpy(d->hist_io_sizes, c->hist_io_sizes, sizeof(d->hist_io_sizes));
523+
524+
/* diff detail IO histogram counts */
525+
for (int i = 0; i < AMZN_NVME_STATS_NUM_HISTOGRAM; i++) {
526+
for (int b = 0; b < AMZN_NVME_STATS_NUM_HISTOGRAM_BINS; b++) {
527+
diff->detail_io.io_hist_array[i].read_io_histogram_counts.counts[b] =
528+
curr->detail_io.io_hist_array[i].read_io_histogram_counts.counts[b] -
529+
prev->detail_io.io_hist_array[i].read_io_histogram_counts.counts[b];
530+
diff->detail_io.io_hist_array[i].write_io_histogram_counts.counts[b] =
531+
curr->detail_io.io_hist_array[i].write_io_histogram_counts.counts[b] -
532+
prev->detail_io.io_hist_array[i].write_io_histogram_counts.counts[b];
533+
}
534+
}
535+
}
536+
466537
static int get_stats(int argc, char **argv, struct command *acmd,
467538
struct plugin *plugin)
468539
{
469540
const char *desc = "display command latency statistics";
470541
_cleanup_nvme_transport_handle_ struct nvme_transport_handle *hdl = NULL;
471542
_cleanup_nvme_global_ctx_ struct nvme_global_ctx *ctx = NULL;
472543
struct amzn_latency_log_page log = { 0 };
473-
nvme_print_flags_t flags = 0; // Initialize flags to 0
544+
nvme_print_flags_t flags = 0;
474545
struct nvme_passthru_cmd cmd;
475546
struct nvme_id_ctrl ctrl;
547+
bool local_storage = false;
476548
bool detail = false;
549+
unsigned int interval = 0;
477550
size_t len;
478551
__u32 nsid = 1;
479552
int rc;
@@ -487,7 +560,8 @@ static int get_stats(int argc, char **argv, struct command *acmd,
487560
};
488561

489562
NVME_ARGS(opts,
490-
OPT_FLAG("details", 'd', &detail, "Detail IO histogram of each block size ranges"));
563+
OPT_FLAG("details", 'd', &detail, "Detail IO histogram of each block size ranges"),
564+
OPT_UINT("interval", 'i', &interval, "Polling interval in seconds (local storage only)"));
491565

492566
rc = parse_and_open(&ctx, &hdl, argc, argv, desc, opts);
493567
if (rc)
@@ -501,6 +575,7 @@ static int get_stats(int argc, char **argv, struct command *acmd,
501575

502576
if (!strncmp((char *)ctrl.mn, AMZN_NVME_LOCAL_STORAGE_PREFIX,
503577
strlen(AMZN_NVME_LOCAL_STORAGE_PREFIX))) {
578+
local_storage = true;
504579
if (nvme_get_nsid(hdl, &nsid) < 0) {
505580
struct nvme_id_ctrl test_ctrl;
506581

@@ -516,6 +591,13 @@ static int get_stats(int argc, char **argv, struct command *acmd,
516591
len = sizeof(log.base);
517592
}
518593

594+
if (interval > 0 && !local_storage) {
595+
fprintf(stderr, "[ERROR] %s: Interval polling is only supported for local storage devices\n",
596+
__func__);
597+
rc = -EINVAL;
598+
goto done;
599+
}
600+
519601
nvme_init_get_log(&cmd, nsid, AMZN_NVME_STATS_LOGPAGE_ID, NVME_CSI_NVM,
520602
&log, len);
521603
rc = nvme_get_log(hdl, &cmd, false, NVME_LOG_PAGE_PDU_SIZE);
@@ -538,10 +620,52 @@ static int get_stats(int argc, char **argv, struct command *acmd,
538620
goto done;
539621
}
540622

541-
if (flags & JSON)
542-
amzn_print_json_stats(&log, detail);
543-
else
544-
amzn_print_normal_stats(&log, detail);
623+
if (interval > 0) {
624+
struct amzn_latency_log_page prev, curr, diff;
625+
struct sigaction sa = { .sa_handler = amzn_sigint_handler };
626+
627+
sigemptyset(&sa.sa_mask);
628+
sigaction(SIGINT, &sa, NULL);
629+
630+
printf("Polling NVMe stats every %u sec(s); press Ctrl+C to stop\n\n",
631+
interval);
632+
633+
/* first snapshot becomes the baseline */
634+
prev = log;
635+
amzn_keep_polling = 1;
636+
637+
while (amzn_keep_polling) {
638+
sleep(interval);
639+
if (!amzn_keep_polling)
640+
break;
641+
642+
memset(&curr, 0, sizeof(curr));
643+
nvme_init_get_log(&cmd, nsid, AMZN_NVME_STATS_LOGPAGE_ID,
644+
NVME_CSI_NVM, &curr, len);
645+
rc = nvme_get_log(hdl, &cmd, false, NVME_LOG_PAGE_PDU_SIZE);
646+
if (rc != 0) {
647+
fprintf(stderr, "[ERROR] %s: Failed to get log page, rc = %d\n",
648+
__func__, rc);
649+
goto done;
650+
}
651+
652+
memset(&diff, 0, sizeof(diff));
653+
amzn_compute_stats_diff(&diff, &curr, &prev);
654+
655+
if (flags & JSON)
656+
amzn_print_json_stats(&diff, detail);
657+
else
658+
amzn_print_normal_stats(&diff, detail);
659+
660+
prev = curr;
661+
printf("\n");
662+
}
663+
} else {
664+
if (flags & JSON)
665+
amzn_print_json_stats(&log, detail);
666+
else
667+
amzn_print_normal_stats(&log, detail);
668+
}
545669

546670
done:
547671
return rc;

0 commit comments

Comments
 (0)