11// SPDX-License-Identifier: GPL-2.0
22/*
3- * delaytop.c - task delay monitoring tool.
3+ * delaytop.c - system-wide delay monitoring tool.
44 *
55 * This tool provides real-time monitoring and statistics of
66 * system, container, and task-level delays, including CPU,
7- * memory, IO, and IRQ and delay accounting. It supports both
8- * interactive (top-like), and can output delay information
9- * for the whole system, specific containers (cgroups), or
10- * individual tasks (PIDs).
7+ * memory, IO, and IRQ. It supports both interactive (top-like),
8+ * and can output delay information for the whole system, specific
9+ * containers (cgroups), or individual tasks (PIDs).
1110 *
1211 * Key features:
1312 * - Collects per-task delay accounting statistics via taskstats.
13+ * - Collects system-wide PSI information.
1414 * - Supports sorting, filtering.
1515 * - Supports both interactive (screen refresh).
1616 *
3232#include <time.h>
3333#include <dirent.h>
3434#include <ctype.h>
35+ #include <stdbool.h>
3536#include <sys/types.h>
3637#include <sys/stat.h>
3738#include <sys/socket.h>
4142#include <linux/genetlink.h>
4243#include <linux/taskstats.h>
4344#include <linux/cgroupstats.h>
44- #include <ncurses.h>
4545
4646#define PSI_CPU_SOME "/proc/pressure/cpu"
4747#define PSI_CPU_FULL "/proc/pressure/cpu"
6262#define MAX_MSG_SIZE 1024
6363#define MAX_TASKS 1000
6464#define SET_TASK_STAT (task_count , field ) tasks[task_count].field = stats.field
65+ #define BOOL_FPRINT (stream , fmt , ...) \
66+ ({ \
67+ int ret = fprintf(stream, fmt, ##__VA_ARGS__); \
68+ ret >= 0; \
69+ })
70+ #define PSI_LINE_FORMAT "%-12s %6.1f%%/%6.1f%%/%6.1f%%/%8llu(ms)\n"
6571
6672/* Program settings structure */
6773struct config {
@@ -262,6 +268,7 @@ static int create_nl_socket(void)
262268 local .nl_family = AF_NETLINK ;
263269
264270 if (bind (fd , (struct sockaddr * ) & local , sizeof (local )) < 0 ) {
271+ fprintf (stderr , "Failed to bind socket when create nl_socket\n" );
265272 close (fd );
266273 return -1 ;
267274 }
@@ -332,13 +339,17 @@ static int get_family_id(int sd)
332339 rc = send_cmd (sd , GENL_ID_CTRL , getpid (), CTRL_CMD_GETFAMILY ,
333340 CTRL_ATTR_FAMILY_NAME , (void * )name ,
334341 strlen (TASKSTATS_GENL_NAME )+ 1 );
335- if (rc < 0 )
342+ if (rc < 0 ) {
343+ fprintf (stderr , "Failed to send cmd for family id\n" );
336344 return 0 ;
345+ }
337346
338347 rep_len = recv (sd , & ans , sizeof (ans ), 0 );
339348 if (ans .n .nlmsg_type == NLMSG_ERROR ||
340- (rep_len < 0 ) || !NLMSG_OK ((& ans .n ), rep_len ))
349+ (rep_len < 0 ) || !NLMSG_OK ((& ans .n ), rep_len )) {
350+ fprintf (stderr , "Failed to receive response for family id\n" );
341351 return 0 ;
352+ }
342353
343354 na = (struct nlattr * ) GENLMSG_DATA (& ans );
344355 na = (struct nlattr * ) ((char * ) na + NLA_ALIGN (na -> nla_len ));
@@ -433,26 +444,30 @@ static void read_psi_stats(void)
433444static int read_comm (int pid , char * comm_buf , size_t buf_size )
434445{
435446 char path [64 ];
447+ int ret = -1 ;
436448 size_t len ;
437449 FILE * fp ;
438450
439451 snprintf (path , sizeof (path ), "/proc/%d/comm" , pid );
440452 fp = fopen (path , "r" );
441- if (!fp )
442- return -1 ;
453+ if (!fp ) {
454+ fprintf (stderr , "Failed to open comm file /proc/%d/comm\n" , pid );
455+ return ret ;
456+ }
457+
443458 if (fgets (comm_buf , buf_size , fp )) {
444459 len = strlen (comm_buf );
445460 if (len > 0 && comm_buf [len - 1 ] == '\n' )
446461 comm_buf [len - 1 ] = '\0' ;
447- } else {
448- fclose (fp );
449- return -1 ;
462+ ret = 0 ;
450463 }
464+
451465 fclose (fp );
452- return 0 ;
466+
467+ return ret ;
453468}
454469
455- static int fetch_and_fill_task_info (int pid , const char * comm )
470+ static void fetch_and_fill_task_info (int pid , const char * comm )
456471{
457472 struct {
458473 struct nlmsghdr n ;
@@ -466,13 +481,21 @@ static int fetch_and_fill_task_info(int pid, const char *comm)
466481 int nl_len ;
467482 int rc ;
468483
484+ /* Send request for task stats */
469485 if (send_cmd (nl_sd , family_id , getpid (), TASKSTATS_CMD_GET ,
470486 TASKSTATS_CMD_ATTR_PID , & pid , sizeof (pid )) < 0 ) {
471- return -1 ;
487+ fprintf (stderr , "Failed to send request for task stats\n" );
488+ return ;
472489 }
490+
491+ /* Receive response */
473492 rc = recv (nl_sd , & resp , sizeof (resp ), 0 );
474- if (rc < 0 || resp .n .nlmsg_type == NLMSG_ERROR )
475- return -1 ;
493+ if (rc < 0 || resp .n .nlmsg_type == NLMSG_ERROR ) {
494+ fprintf (stderr , "Failed to receive response for task stats\n" );
495+ return ;
496+ }
497+
498+ /* Parse response */
476499 nl_len = GENLMSG_PAYLOAD (& resp .n );
477500 na = (struct nlattr * ) GENLMSG_DATA (& resp );
478501 while (nl_len > 0 ) {
@@ -515,7 +538,7 @@ static int fetch_and_fill_task_info(int pid, const char *comm)
515538 nl_len -= NLA_ALIGN (na -> nla_len );
516539 na = NLA_NEXT (na );
517540 }
518- return 0 ;
541+ return ;
519542}
520543
521544static void get_task_delays (void )
@@ -654,54 +677,82 @@ static void display_results(void)
654677{
655678 time_t now = time (NULL );
656679 struct tm * tm_now = localtime (& now );
680+ FILE * out = stdout ;
657681 char timestamp [32 ];
682+ bool suc = true;
658683 int i , count ;
659- FILE * out = stdout ;
660684
661- fprintf (out , "\033[H\033[J" );
685+ /* Clear terminal screen */
686+ suc &= BOOL_FPRINT (out , "\033[H\033[J" );
687+
662688 /* PSI output (one-line, no cat style) */
663- fprintf (out , "System Pressure Information: " );
664- fprintf (out , "(avg10/avg60/avg300/total)\n" );
665- fprintf (out , "CPU:" );
666- fprintf (out , " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu" , psi .cpu_full_avg10 ,
667- psi .cpu_full_avg60 , psi .cpu_full_avg300 , psi .cpu_full_total );
668- fprintf (out , " some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n" , psi .cpu_some_avg10 ,
669- psi .cpu_some_avg60 , psi .cpu_some_avg300 , psi .cpu_some_total );
670-
671- fprintf (out , "Memory:" );
672- fprintf (out , " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu" , psi .memory_full_avg10 ,
673- psi .memory_full_avg60 , psi .memory_full_avg300 , psi .memory_full_total );
674- fprintf (out , " some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n" , psi .memory_some_avg10 ,
675- psi .memory_some_avg60 , psi .memory_some_avg300 , psi .memory_some_total );
676-
677- fprintf (out , "IO:" );
678- fprintf (out , " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu" , psi .io_full_avg10 ,
679- psi .io_full_avg60 , psi .io_full_avg300 , psi .io_full_total );
680- fprintf (out , " some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n" , psi .io_some_avg10 ,
681- psi .io_some_avg60 , psi .io_some_avg300 , psi .io_some_total );
682- fprintf (out , "IRQ:" );
683- fprintf (out , " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n\n" , psi .irq_full_avg10 ,
684- psi .irq_full_avg60 , psi .irq_full_avg300 , psi .irq_full_total );
689+ suc &= BOOL_FPRINT (out , "System Pressure Information: (avg10/avg60/avg300/total)\n" );
690+ suc &= BOOL_FPRINT (out , PSI_LINE_FORMAT ,
691+ "CPU some:" ,
692+ psi .cpu_some_avg10 ,
693+ psi .cpu_some_avg60 ,
694+ psi .cpu_some_avg300 ,
695+ psi .cpu_some_total / 1000 );
696+ suc &= BOOL_FPRINT (out , PSI_LINE_FORMAT ,
697+ "CPU full:" ,
698+ psi .cpu_full_avg10 ,
699+ psi .cpu_full_avg60 ,
700+ psi .cpu_full_avg300 ,
701+ psi .cpu_full_total / 1000 );
702+ suc &= BOOL_FPRINT (out , PSI_LINE_FORMAT ,
703+ "Memory full:" ,
704+ psi .memory_full_avg10 ,
705+ psi .memory_full_avg60 ,
706+ psi .memory_full_avg300 ,
707+ psi .memory_full_total / 1000 );
708+ suc &= BOOL_FPRINT (out , PSI_LINE_FORMAT ,
709+ "Memory some:" ,
710+ psi .memory_some_avg10 ,
711+ psi .memory_some_avg60 ,
712+ psi .memory_some_avg300 ,
713+ psi .memory_some_total / 1000 );
714+ suc &= BOOL_FPRINT (out , PSI_LINE_FORMAT ,
715+ "IO full:" ,
716+ psi .io_full_avg10 ,
717+ psi .io_full_avg60 ,
718+ psi .io_full_avg300 ,
719+ psi .io_full_total / 1000 );
720+ suc &= BOOL_FPRINT (out , PSI_LINE_FORMAT ,
721+ "IO some:" ,
722+ psi .io_some_avg10 ,
723+ psi .io_some_avg60 ,
724+ psi .io_some_avg300 ,
725+ psi .io_some_total / 1000 );
726+ suc &= BOOL_FPRINT (out , PSI_LINE_FORMAT ,
727+ "IRQ full:" ,
728+ psi .irq_full_avg10 ,
729+ psi .irq_full_avg60 ,
730+ psi .irq_full_avg300 ,
731+ psi .irq_full_total / 1000 );
732+
685733 if (cfg .container_path ) {
686- fprintf (out , "Container Information (%s):\n" , cfg .container_path );
687- fprintf (out , "Processes: running=%d, sleeping=%d, " ,
734+ suc &= BOOL_FPRINT (out , "Container Information (%s):\n" , cfg .container_path );
735+ suc &= BOOL_FPRINT (out , "Processes: running=%d, sleeping=%d, " ,
688736 container_stats .nr_running , container_stats .nr_sleeping );
689- fprintf (out , "stopped=%d, uninterruptible=%d, io_wait=%d\n\n" ,
737+ suc &= BOOL_FPRINT (out , "stopped=%d, uninterruptible=%d, io_wait=%d\n\n" ,
690738 container_stats .nr_stopped , container_stats .nr_uninterruptible ,
691739 container_stats .nr_io_wait );
692740 }
693- fprintf (out , "Top %d processes (sorted by CPU delay):\n \n" ,
741+ suc &= BOOL_FPRINT (out , "Top %d processes (sorted by CPU delay):\n" ,
694742 cfg .max_processes );
695- fprintf (out , " PID TGID COMMAND CPU(ms) IO(ms) " );
696- fprintf (out , "SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms)\n" );
697- fprintf (out , "-----------------------------------------------" );
698- fprintf (out , "----------------------------------------------\n" );
743+ suc &= BOOL_FPRINT (out , "%5s %5s %-17s" , "PID" , "TGID" , "COMMAND" );
744+ suc &= BOOL_FPRINT (out , "%7s %7s %7s %7s %7s %7s %7s %7s\n" ,
745+ "CPU(ms)" , "IO(ms)" , "SWAP(ms)" , "RCL(ms)" ,
746+ "THR(ms)" , "CMP(ms)" , "WP(ms)" , "IRQ(ms)" );
747+
748+ suc &= BOOL_FPRINT (out , "-----------------------------------------------" );
749+ suc &= BOOL_FPRINT (out , "----------------------------------------------\n" );
699750 count = task_count < cfg .max_processes ? task_count : cfg .max_processes ;
700751
701752 for (i = 0 ; i < count ; i ++ ) {
702- fprintf (out , "%5d %5d %-15s " ,
753+ suc &= BOOL_FPRINT (out , "%5d %5d %-15s" ,
703754 tasks [i ].pid , tasks [i ].tgid , tasks [i ].command );
704- fprintf (out , "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f\n" ,
755+ suc &= BOOL_FPRINT (out , "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f\n" ,
705756 average_ms (tasks [i ].cpu_delay_total , tasks [i ].cpu_count ),
706757 average_ms (tasks [i ].blkio_delay_total , tasks [i ].blkio_count ),
707758 average_ms (tasks [i ].swapin_delay_total , tasks [i ].swapin_count ),
@@ -712,7 +763,10 @@ static void display_results(void)
712763 average_ms (tasks [i ].irq_delay_total , tasks [i ].irq_count ));
713764 }
714765
715- fprintf (out , "\n" );
766+ suc &= BOOL_FPRINT (out , "\n" );
767+
768+ if (!suc )
769+ perror ("Error writing to output" );
716770}
717771
718772/* Main function */
0 commit comments