Skip to content

Commit aa1abb6

Browse files
committed
micron: add smart-log and id-ctrl commands
Add smart-log command to retrieve SMART/Health log with vendor-specific OLEC and IPM fields. Add id-ctrl command to display Identify Controller with vendor-specific PMS, IPMSR, and MSMT fields. Signed-off-by: Swapna Samoju <[email protected]>
1 parent 85154a1 commit aa1abb6

2 files changed

Lines changed: 333 additions & 0 deletions

File tree

plugins/micron/micron-nvme.c

Lines changed: 330 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <string.h>
1414
#include <ctype.h>
1515
#include <stdlib.h>
16+
#include <inttypes.h>
1617
#include <errno.h>
1718
#include <fcntl.h>
1819
#include <unistd.h>
@@ -30,6 +31,7 @@
3031
#include "nvme-print.h"
3132
#include "util/cleanup.h"
3233
#include "util/utils.h"
34+
#include "util/types.h"
3335

3436
#define CREATE_CMD
3537
#include "micron-nvme.h"
@@ -4208,3 +4210,331 @@ static int micron_cloud_log(int argc, char **argv, struct command *acmd,
42084210
nvme_show_status(err);
42094211
return err;
42104212
}
4213+
4214+
/* Extended SMART log structure with Micron-specific fields in reserved area */
4215+
struct micron_smart_log_ext {
4216+
struct nvme_smart_log base;
4217+
/* Access vendor-specific fields via rsvd232 overlay */
4218+
};
4219+
4220+
/*
4221+
* OLEC: bytes 232-239 (rsvd232[0:7])
4222+
* IPM: bytes 240-243 (rsvd232[8:11])
4223+
*/
4224+
#define SMART_OLEC_OFFSET 0
4225+
#define SMART_IPM_OFFSET 8
4226+
4227+
static inline __u64 get_smart_olec(struct nvme_smart_log *smart)
4228+
{
4229+
return le64_to_cpu(*(__le64 *)&smart->rsvd232[SMART_OLEC_OFFSET]);
4230+
}
4231+
4232+
static inline __u32 get_smart_ipm(struct nvme_smart_log *smart)
4233+
{
4234+
return le32_to_cpu(*(__le32 *)&smart->rsvd232[SMART_IPM_OFFSET]);
4235+
}
4236+
4237+
static void print_micron_health_log_normal(struct nvme_smart_log *smart,
4238+
const char *devname)
4239+
{
4240+
__u16 temp = smart->temperature[1] << 8 | smart->temperature[0];
4241+
__u64 olec = get_smart_olec(smart);
4242+
__u32 ipm = get_smart_ipm(smart);
4243+
int i;
4244+
4245+
printf("SMART/Health Information Log for %s\n", devname);
4246+
printf("========================================\n");
4247+
4248+
printf("Critical Warning : 0x%02x\n",
4249+
smart->critical_warning);
4250+
if (smart->critical_warning) {
4251+
if (smart->critical_warning & 0x01)
4252+
printf(" - Available spare below threshold\n");
4253+
if (smart->critical_warning & 0x02)
4254+
printf(" - Temperature threshold exceeded\n");
4255+
if (smart->critical_warning & 0x04)
4256+
printf(" - NVM subsystem reliability degraded\n");
4257+
if (smart->critical_warning & 0x08)
4258+
printf(" - Media placed in read-only mode\n");
4259+
if (smart->critical_warning & 0x10)
4260+
printf(" - Volatile memory backup failed\n");
4261+
if (smart->critical_warning & 0x20)
4262+
printf(" - PMR read-only or unreliable\n");
4263+
}
4264+
4265+
printf("Composite Temperature : %u K (%d C)\n",
4266+
temp, temp ? temp - 273 : 0);
4267+
printf("Available Spare : %u%%\n", smart->avail_spare);
4268+
printf("Available Spare Threshold : %u%%\n", smart->spare_thresh);
4269+
printf("Percentage Used : %u%%\n", smart->percent_used);
4270+
printf("Endurance Grp Critical Warn : 0x%02x\n",
4271+
smart->endu_grp_crit_warn_sumry);
4272+
4273+
printf("Data Units Read : %s\n",
4274+
uint128_t_to_string(le128_to_cpu(smart->data_units_read)));
4275+
printf("Data Units Written : %s\n",
4276+
uint128_t_to_string(le128_to_cpu(smart->data_units_written)));
4277+
printf("Host Read Commands : %s\n",
4278+
uint128_t_to_string(le128_to_cpu(smart->host_reads)));
4279+
printf("Host Write Commands : %s\n",
4280+
uint128_t_to_string(le128_to_cpu(smart->host_writes)));
4281+
printf("Controller Busy Time : %s min\n",
4282+
uint128_t_to_string(le128_to_cpu(smart->ctrl_busy_time)));
4283+
printf("Power Cycles : %s\n",
4284+
uint128_t_to_string(le128_to_cpu(smart->power_cycles)));
4285+
printf("Power On Hours : %s\n",
4286+
uint128_t_to_string(le128_to_cpu(smart->power_on_hours)));
4287+
printf("Unsafe Shutdowns : %s\n",
4288+
uint128_t_to_string(le128_to_cpu(smart->unsafe_shutdowns)));
4289+
printf("Media Errors : %s\n",
4290+
uint128_t_to_string(le128_to_cpu(smart->media_errors)));
4291+
printf("Num Error Log Entries : %s\n",
4292+
uint128_t_to_string(le128_to_cpu(smart->num_err_log_entries)));
4293+
4294+
printf("Warning Comp Temp Time : %u min\n",
4295+
le32_to_cpu(smart->warning_temp_time));
4296+
printf("Critical Comp Temp Time : %u min\n",
4297+
le32_to_cpu(smart->critical_comp_time));
4298+
4299+
for (i = 0; i < 8; i++) {
4300+
__u16 ts = le16_to_cpu(smart->temp_sensor[i]);
4301+
4302+
if (ts)
4303+
printf("Temperature Sensor %d : %u K (%d C)\n",
4304+
i + 1, ts, ts - 273);
4305+
}
4306+
4307+
printf("Thm Temp 1 Trans Count : %u\n",
4308+
le32_to_cpu(smart->thm_temp1_trans_count));
4309+
printf("Thm Temp 2 Trans Count : %u\n",
4310+
le32_to_cpu(smart->thm_temp2_trans_count));
4311+
printf("Thm Temp 1 Total Time : %u sec\n",
4312+
le32_to_cpu(smart->thm_temp1_total_time));
4313+
printf("Thm Temp 2 Total Time : %u sec\n",
4314+
le32_to_cpu(smart->thm_temp2_total_time));
4315+
4316+
/* Micron-specific extended fields */
4317+
printf("OLEC (Energy) : %llu\n",
4318+
(unsigned long long)olec);
4319+
printf("Interval Power Measurement : %u\n", ipm);
4320+
}
4321+
4322+
static void print_micron_health_log_json(struct nvme_smart_log *smart,
4323+
const char *devname)
4324+
{
4325+
__u16 temp = smart->temperature[1] << 8 | smart->temperature[0];
4326+
__u64 olec = get_smart_olec(smart);
4327+
__u32 ipm = get_smart_ipm(smart);
4328+
struct json_object *root;
4329+
int i;
4330+
4331+
root = json_create_object();
4332+
4333+
json_object_add_value_string(root, "device", devname);
4334+
json_object_add_value_int(root, "critical_warning",
4335+
smart->critical_warning);
4336+
json_object_add_value_int(root, "temperature_kelvin", temp);
4337+
json_object_add_value_int(root, "temperature_celsius",
4338+
temp ? temp - 273 : 0);
4339+
json_object_add_value_int(root, "avail_spare", smart->avail_spare);
4340+
json_object_add_value_int(root, "spare_thresh", smart->spare_thresh);
4341+
json_object_add_value_int(root, "percent_used", smart->percent_used);
4342+
json_object_add_value_int(root, "endurance_grp_crit_warn",
4343+
smart->endu_grp_crit_warn_sumry);
4344+
4345+
json_object_add_value_string(root, "data_units_read",
4346+
uint128_t_to_string(le128_to_cpu(smart->data_units_read)));
4347+
json_object_add_value_string(root, "data_units_written",
4348+
uint128_t_to_string(le128_to_cpu(smart->data_units_written)));
4349+
json_object_add_value_string(root, "host_reads",
4350+
uint128_t_to_string(le128_to_cpu(smart->host_reads)));
4351+
json_object_add_value_string(root, "host_writes",
4352+
uint128_t_to_string(le128_to_cpu(smart->host_writes)));
4353+
json_object_add_value_string(root, "ctrl_busy_time",
4354+
uint128_t_to_string(le128_to_cpu(smart->ctrl_busy_time)));
4355+
json_object_add_value_string(root, "power_cycles",
4356+
uint128_t_to_string(le128_to_cpu(smart->power_cycles)));
4357+
json_object_add_value_string(root, "power_on_hours",
4358+
uint128_t_to_string(le128_to_cpu(smart->power_on_hours)));
4359+
json_object_add_value_string(root, "unsafe_shutdowns",
4360+
uint128_t_to_string(le128_to_cpu(smart->unsafe_shutdowns)));
4361+
json_object_add_value_string(root, "media_errors",
4362+
uint128_t_to_string(le128_to_cpu(smart->media_errors)));
4363+
json_object_add_value_string(root, "num_err_log_entries",
4364+
uint128_t_to_string(le128_to_cpu(smart->num_err_log_entries)));
4365+
4366+
json_object_add_value_uint(root, "warning_temp_time",
4367+
le32_to_cpu(smart->warning_temp_time));
4368+
json_object_add_value_uint(root, "critical_comp_time",
4369+
le32_to_cpu(smart->critical_comp_time));
4370+
4371+
for (i = 0; i < 8; i++) {
4372+
__u16 ts = le16_to_cpu(smart->temp_sensor[i]);
4373+
char key[32];
4374+
4375+
if (ts) {
4376+
sprintf(key, "temp_sensor_%d", i + 1);
4377+
json_object_add_value_int(root, key, ts - 273);
4378+
}
4379+
}
4380+
4381+
json_object_add_value_uint(root, "thm_temp1_trans_count",
4382+
le32_to_cpu(smart->thm_temp1_trans_count));
4383+
json_object_add_value_uint(root, "thm_temp2_trans_count",
4384+
le32_to_cpu(smart->thm_temp2_trans_count));
4385+
json_object_add_value_uint(root, "thm_temp1_total_time",
4386+
le32_to_cpu(smart->thm_temp1_total_time));
4387+
json_object_add_value_uint(root, "thm_temp2_total_time",
4388+
le32_to_cpu(smart->thm_temp2_total_time));
4389+
4390+
/* Micron-specific extended fields */
4391+
json_object_add_value_uint64(root, "olec", olec);
4392+
json_object_add_value_uint(root, "ipm", ipm);
4393+
4394+
json_print_object(root, NULL);
4395+
printf("\n");
4396+
json_free_object(root);
4397+
}
4398+
4399+
static int micron_health_info(int argc, char **argv, struct command *acmd,
4400+
struct plugin *plugin)
4401+
{
4402+
_cleanup_nvme_global_ctx_ struct nvme_global_ctx *ctx = NULL;
4403+
_cleanup_nvme_transport_handle_ struct nvme_transport_handle *hdl = NULL;
4404+
const char *desc = "Retrieve SMART/Health log for Micron drives";
4405+
const char *fmt = "output format normal|json";
4406+
enum eDriveModel eModel = UNKNOWN_MODEL;
4407+
struct nvme_smart_log smart_log = { 0 };
4408+
bool is_json = false;
4409+
int err = 0;
4410+
struct format {
4411+
char *fmt;
4412+
};
4413+
struct format cfg = {
4414+
.fmt = "normal",
4415+
};
4416+
4417+
OPT_ARGS(opts) = {
4418+
OPT_FMT("format", 'f', &cfg.fmt, fmt),
4419+
OPT_END()
4420+
};
4421+
4422+
err = micron_parse_options(&ctx, &hdl, argc, argv, desc, opts, &eModel);
4423+
if (err < 0)
4424+
return err;
4425+
4426+
if (eModel == UNKNOWN_MODEL)
4427+
fprintf(stderr, "WARNING: Unknown drive model\n");
4428+
4429+
if (!strcmp(cfg.fmt, "json"))
4430+
is_json = true;
4431+
4432+
err = nvme_get_log_smart(hdl, NVME_NSID_ALL, &smart_log);
4433+
if (err) {
4434+
fprintf(stderr, "Failed to get SMART log: %s\n",
4435+
nvme_strerror(err));
4436+
return err;
4437+
}
4438+
4439+
if (is_json)
4440+
print_micron_health_log_json(&smart_log, argv[optind]);
4441+
else
4442+
print_micron_health_log_normal(&smart_log, argv[optind]);
4443+
4444+
return 0;
4445+
}
4446+
4447+
/*
4448+
* Identify Controller field offsets for Micron-specific fields
4449+
* IPMSR: Interval Power Measurement Sample Rate (2 bytes)
4450+
* MSMT: Maximum Stop Measurement Time (2 bytes)
4451+
* PMS: Power Measurement Support - bit 21 of CTRATT
4452+
*/
4453+
#define ID_CTRL_RSVD388_OFFSET 388
4454+
#define ID_CTRL_IPMSR_OFFSET 392
4455+
#define ID_CTRL_MSMT_OFFSET 394
4456+
#define CTRATT_PMS_BIT 21
4457+
4458+
static inline __u16 get_id_ctrl_ipmsr(struct nvme_id_ctrl *ctrl)
4459+
{
4460+
__u8 *p = &ctrl->rsvd388[ID_CTRL_IPMSR_OFFSET - ID_CTRL_RSVD388_OFFSET];
4461+
4462+
return le16_to_cpu(*(__le16 *)p);
4463+
}
4464+
4465+
static inline __u16 get_id_ctrl_msmt(struct nvme_id_ctrl *ctrl)
4466+
{
4467+
__u8 *p = &ctrl->rsvd388[ID_CTRL_MSMT_OFFSET - ID_CTRL_RSVD388_OFFSET];
4468+
4469+
return le16_to_cpu(*(__le16 *)p);
4470+
}
4471+
4472+
static inline bool get_id_ctrl_pms(struct nvme_id_ctrl *ctrl)
4473+
{
4474+
return (le32_to_cpu(ctrl->ctratt) >> CTRATT_PMS_BIT) & 0x1;
4475+
}
4476+
4477+
/* Micron vendor-specific id-ctrl fields display */
4478+
static void micron_id_ctrl_vs(__u8 *vs, struct json_object *root)
4479+
{
4480+
/* Cast back to get full ctrl structure for our extended fields */
4481+
struct nvme_id_ctrl *ctrl =
4482+
(struct nvme_id_ctrl *)(vs - offsetof(struct nvme_id_ctrl, vs));
4483+
__u16 ipmsr = get_id_ctrl_ipmsr(ctrl);
4484+
__u16 msmt = get_id_ctrl_msmt(ctrl);
4485+
bool pms = get_id_ctrl_pms(ctrl);
4486+
4487+
if (root) {
4488+
/* JSON output */
4489+
json_object_add_value_int(root, "pms", pms ? 1 : 0);
4490+
json_object_add_value_uint(root, "ipmsr", ipmsr);
4491+
json_object_add_value_uint(root, "msmt", msmt);
4492+
} else {
4493+
/* Normal output */
4494+
printf("pms : %u\n", pms ? 1 : 0);
4495+
printf("ipmsr : %u\n", ipmsr);
4496+
printf("msmt : %u\n", msmt);
4497+
}
4498+
}
4499+
4500+
static int micron_id_ctrl(int argc, char **argv, struct command *acmd,
4501+
struct plugin *plugin)
4502+
{
4503+
_cleanup_nvme_global_ctx_ struct nvme_global_ctx *ctx = NULL;
4504+
_cleanup_nvme_transport_handle_ struct nvme_transport_handle *hdl = NULL;
4505+
const char *desc = "Identify Controller with Micron vendor fields";
4506+
enum eDriveModel eModel = UNKNOWN_MODEL;
4507+
struct nvme_id_ctrl ctrl = { 0 };
4508+
nvme_print_flags_t flags;
4509+
int err = 0;
4510+
4511+
OPT_ARGS(opts) = {
4512+
OPT_END()
4513+
};
4514+
4515+
err = micron_parse_options(&ctx, &hdl, argc, argv, desc, opts, &eModel);
4516+
if (err < 0)
4517+
return err;
4518+
4519+
if (eModel == UNKNOWN_MODEL) {
4520+
fprintf(stderr,
4521+
"WARNING: Drive not recognized as Micron, proceeding anyway\n");
4522+
}
4523+
4524+
err = validate_output_format(nvme_cfg.output_format, &flags);
4525+
if (err < 0) {
4526+
fprintf(stderr, "Invalid output format\n");
4527+
return err;
4528+
}
4529+
4530+
err = nvme_identify_ctrl(hdl, &ctrl);
4531+
if (err) {
4532+
fprintf(stderr, "identify controller failed: %s\n",
4533+
nvme_strerror(err));
4534+
return err;
4535+
}
4536+
4537+
nvme_show_id_ctrl(&ctrl, flags, micron_id_ctrl_vs);
4538+
4539+
return 0;
4540+
}

plugins/micron/micron-nvme.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ PLUGIN(NAME("micron", "Micron vendor specific extensions", NVME_VERSION),
4646
ENTRY("vs-work-load-log", "Retrieve Workload logs", micron_work_load_log)
4747
ENTRY("vs-vendor-telemetry-log",
4848
"Retrieve Vendor Telemetry logs", micron_vendor_telemetry_log)
49+
ENTRY("smart-log", "Retrieve SMART/Health Log",
50+
micron_health_info)
51+
ENTRY("id-ctrl", "Identify Controller", micron_id_ctrl)
4952
)
5053
);
5154

0 commit comments

Comments
 (0)