Skip to content

Commit c811732

Browse files
committed
micron: add smart-log and id-ctrl commands
Add smart-log command to retrieve SMART/Health log with vendor-specific OLEC and IPM fields. Add id-ctrl command to display Identify Controller with vendor-specific PMS, IPMSR, and MSMT fields. Signed-off-by: Swapna Samoju <[email protected]>
1 parent 82f2aad commit c811732

2 files changed

Lines changed: 333 additions & 0 deletions

File tree

plugins/micron/micron-nvme.c

Lines changed: 330 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <string.h>
1414
#include <ctype.h>
1515
#include <stdlib.h>
16+
#include <inttypes.h>
1617
#include <errno.h>
1718
#include <fcntl.h>
1819
#include <unistd.h>
@@ -30,6 +31,7 @@
3031
#include "nvme-print.h"
3132
#include "util/cleanup.h"
3233
#include "util/utils.h"
34+
#include "util/types.h"
3335

3436
#define CREATE_CMD
3537
#include "micron-nvme.h"
@@ -4178,3 +4180,331 @@ static int micron_cloud_log(int argc, char **argv, struct command *acmd,
41784180
nvme_show_status(err);
41794181
return err;
41804182
}
4183+
4184+
/* Extended SMART log structure with Micron-specific fields in reserved area */
4185+
struct micron_smart_log_ext {
4186+
struct nvme_smart_log base;
4187+
/* Access vendor-specific fields via rsvd232 overlay */
4188+
};
4189+
4190+
/*
4191+
* OLEC: bytes 232-239 (rsvd232[0:7])
4192+
* IPM: bytes 240-243 (rsvd232[8:11])
4193+
*/
4194+
#define SMART_OLEC_OFFSET 0
4195+
#define SMART_IPM_OFFSET 8
4196+
4197+
static inline __u64 get_smart_olec(struct nvme_smart_log *smart)
4198+
{
4199+
return le64_to_cpu(*(__le64 *)&smart->rsvd232[SMART_OLEC_OFFSET]);
4200+
}
4201+
4202+
static inline __u32 get_smart_ipm(struct nvme_smart_log *smart)
4203+
{
4204+
return le32_to_cpu(*(__le32 *)&smart->rsvd232[SMART_IPM_OFFSET]);
4205+
}
4206+
4207+
static void print_micron_health_log_normal(struct nvme_smart_log *smart,
4208+
const char *devname)
4209+
{
4210+
__u16 temp = smart->temperature[1] << 8 | smart->temperature[0];
4211+
__u64 olec = get_smart_olec(smart);
4212+
__u32 ipm = get_smart_ipm(smart);
4213+
int i;
4214+
4215+
printf("SMART/Health Information Log for %s\n", devname);
4216+
printf("========================================\n");
4217+
4218+
printf("Critical Warning : 0x%02x\n",
4219+
smart->critical_warning);
4220+
if (smart->critical_warning) {
4221+
if (smart->critical_warning & 0x01)
4222+
printf(" - Available spare below threshold\n");
4223+
if (smart->critical_warning & 0x02)
4224+
printf(" - Temperature threshold exceeded\n");
4225+
if (smart->critical_warning & 0x04)
4226+
printf(" - NVM subsystem reliability degraded\n");
4227+
if (smart->critical_warning & 0x08)
4228+
printf(" - Media placed in read-only mode\n");
4229+
if (smart->critical_warning & 0x10)
4230+
printf(" - Volatile memory backup failed\n");
4231+
if (smart->critical_warning & 0x20)
4232+
printf(" - PMR read-only or unreliable\n");
4233+
}
4234+
4235+
printf("Composite Temperature : %u K (%d C)\n",
4236+
temp, temp ? temp - 273 : 0);
4237+
printf("Available Spare : %u%%\n", smart->avail_spare);
4238+
printf("Available Spare Threshold : %u%%\n", smart->spare_thresh);
4239+
printf("Percentage Used : %u%%\n", smart->percent_used);
4240+
printf("Endurance Grp Critical Warn : 0x%02x\n",
4241+
smart->endu_grp_crit_warn_sumry);
4242+
4243+
printf("Data Units Read : %s\n",
4244+
uint128_t_to_string(le128_to_cpu(smart->data_units_read)));
4245+
printf("Data Units Written : %s\n",
4246+
uint128_t_to_string(le128_to_cpu(smart->data_units_written)));
4247+
printf("Host Read Commands : %s\n",
4248+
uint128_t_to_string(le128_to_cpu(smart->host_reads)));
4249+
printf("Host Write Commands : %s\n",
4250+
uint128_t_to_string(le128_to_cpu(smart->host_writes)));
4251+
printf("Controller Busy Time : %s min\n",
4252+
uint128_t_to_string(le128_to_cpu(smart->ctrl_busy_time)));
4253+
printf("Power Cycles : %s\n",
4254+
uint128_t_to_string(le128_to_cpu(smart->power_cycles)));
4255+
printf("Power On Hours : %s\n",
4256+
uint128_t_to_string(le128_to_cpu(smart->power_on_hours)));
4257+
printf("Unsafe Shutdowns : %s\n",
4258+
uint128_t_to_string(le128_to_cpu(smart->unsafe_shutdowns)));
4259+
printf("Media Errors : %s\n",
4260+
uint128_t_to_string(le128_to_cpu(smart->media_errors)));
4261+
printf("Num Error Log Entries : %s\n",
4262+
uint128_t_to_string(le128_to_cpu(smart->num_err_log_entries)));
4263+
4264+
printf("Warning Comp Temp Time : %u min\n",
4265+
le32_to_cpu(smart->warning_temp_time));
4266+
printf("Critical Comp Temp Time : %u min\n",
4267+
le32_to_cpu(smart->critical_comp_time));
4268+
4269+
for (i = 0; i < 8; i++) {
4270+
__u16 ts = le16_to_cpu(smart->temp_sensor[i]);
4271+
4272+
if (ts)
4273+
printf("Temperature Sensor %d : %u K (%d C)\n",
4274+
i + 1, ts, ts - 273);
4275+
}
4276+
4277+
printf("Thm Temp 1 Trans Count : %u\n",
4278+
le32_to_cpu(smart->thm_temp1_trans_count));
4279+
printf("Thm Temp 2 Trans Count : %u\n",
4280+
le32_to_cpu(smart->thm_temp2_trans_count));
4281+
printf("Thm Temp 1 Total Time : %u sec\n",
4282+
le32_to_cpu(smart->thm_temp1_total_time));
4283+
printf("Thm Temp 2 Total Time : %u sec\n",
4284+
le32_to_cpu(smart->thm_temp2_total_time));
4285+
4286+
/* Micron-specific extended fields */
4287+
printf("OLEC (Energy) : %llu\n",
4288+
(unsigned long long)olec);
4289+
printf("Interval Power Measurement : %u\n", ipm);
4290+
}
4291+
4292+
static void print_micron_health_log_json(struct nvme_smart_log *smart,
4293+
const char *devname)
4294+
{
4295+
__u16 temp = smart->temperature[1] << 8 | smart->temperature[0];
4296+
__u64 olec = get_smart_olec(smart);
4297+
__u32 ipm = get_smart_ipm(smart);
4298+
struct json_object *root;
4299+
int i;
4300+
4301+
root = json_create_object();
4302+
4303+
json_object_add_value_string(root, "device", devname);
4304+
json_object_add_value_int(root, "critical_warning",
4305+
smart->critical_warning);
4306+
json_object_add_value_int(root, "temperature_kelvin", temp);
4307+
json_object_add_value_int(root, "temperature_celsius",
4308+
temp ? temp - 273 : 0);
4309+
json_object_add_value_int(root, "avail_spare", smart->avail_spare);
4310+
json_object_add_value_int(root, "spare_thresh", smart->spare_thresh);
4311+
json_object_add_value_int(root, "percent_used", smart->percent_used);
4312+
json_object_add_value_int(root, "endurance_grp_crit_warn",
4313+
smart->endu_grp_crit_warn_sumry);
4314+
4315+
json_object_add_value_string(root, "data_units_read",
4316+
uint128_t_to_string(le128_to_cpu(smart->data_units_read)));
4317+
json_object_add_value_string(root, "data_units_written",
4318+
uint128_t_to_string(le128_to_cpu(smart->data_units_written)));
4319+
json_object_add_value_string(root, "host_reads",
4320+
uint128_t_to_string(le128_to_cpu(smart->host_reads)));
4321+
json_object_add_value_string(root, "host_writes",
4322+
uint128_t_to_string(le128_to_cpu(smart->host_writes)));
4323+
json_object_add_value_string(root, "ctrl_busy_time",
4324+
uint128_t_to_string(le128_to_cpu(smart->ctrl_busy_time)));
4325+
json_object_add_value_string(root, "power_cycles",
4326+
uint128_t_to_string(le128_to_cpu(smart->power_cycles)));
4327+
json_object_add_value_string(root, "power_on_hours",
4328+
uint128_t_to_string(le128_to_cpu(smart->power_on_hours)));
4329+
json_object_add_value_string(root, "unsafe_shutdowns",
4330+
uint128_t_to_string(le128_to_cpu(smart->unsafe_shutdowns)));
4331+
json_object_add_value_string(root, "media_errors",
4332+
uint128_t_to_string(le128_to_cpu(smart->media_errors)));
4333+
json_object_add_value_string(root, "num_err_log_entries",
4334+
uint128_t_to_string(le128_to_cpu(smart->num_err_log_entries)));
4335+
4336+
json_object_add_value_uint(root, "warning_temp_time",
4337+
le32_to_cpu(smart->warning_temp_time));
4338+
json_object_add_value_uint(root, "critical_comp_time",
4339+
le32_to_cpu(smart->critical_comp_time));
4340+
4341+
for (i = 0; i < 8; i++) {
4342+
__u16 ts = le16_to_cpu(smart->temp_sensor[i]);
4343+
char key[32];
4344+
4345+
if (ts) {
4346+
sprintf(key, "temp_sensor_%d", i + 1);
4347+
json_object_add_value_int(root, key, ts - 273);
4348+
}
4349+
}
4350+
4351+
json_object_add_value_uint(root, "thm_temp1_trans_count",
4352+
le32_to_cpu(smart->thm_temp1_trans_count));
4353+
json_object_add_value_uint(root, "thm_temp2_trans_count",
4354+
le32_to_cpu(smart->thm_temp2_trans_count));
4355+
json_object_add_value_uint(root, "thm_temp1_total_time",
4356+
le32_to_cpu(smart->thm_temp1_total_time));
4357+
json_object_add_value_uint(root, "thm_temp2_total_time",
4358+
le32_to_cpu(smart->thm_temp2_total_time));
4359+
4360+
/* Micron-specific extended fields */
4361+
json_object_add_value_uint64(root, "olec", olec);
4362+
json_object_add_value_uint(root, "ipm", ipm);
4363+
4364+
json_print_object(root, NULL);
4365+
printf("\n");
4366+
json_free_object(root);
4367+
}
4368+
4369+
static int micron_health_info(int argc, char **argv, struct command *acmd,
4370+
struct plugin *plugin)
4371+
{
4372+
_cleanup_nvme_global_ctx_ struct nvme_global_ctx *ctx = NULL;
4373+
_cleanup_nvme_transport_handle_ struct nvme_transport_handle *hdl = NULL;
4374+
const char *desc = "Retrieve SMART/Health log for Micron drives";
4375+
const char *fmt = "output format normal|json";
4376+
enum eDriveModel eModel = UNKNOWN_MODEL;
4377+
struct nvme_smart_log smart_log = { 0 };
4378+
bool is_json = false;
4379+
int err = 0;
4380+
struct format {
4381+
char *fmt;
4382+
};
4383+
struct format cfg = {
4384+
.fmt = "normal",
4385+
};
4386+
4387+
OPT_ARGS(opts) = {
4388+
OPT_FMT("format", 'f', &cfg.fmt, fmt),
4389+
OPT_END()
4390+
};
4391+
4392+
err = micron_parse_options(&ctx, &hdl, argc, argv, desc, opts, &eModel);
4393+
if (err < 0)
4394+
return err;
4395+
4396+
if (eModel == UNKNOWN_MODEL)
4397+
fprintf(stderr, "WARNING: Unknown drive model\n");
4398+
4399+
if (!strcmp(cfg.fmt, "json"))
4400+
is_json = true;
4401+
4402+
err = nvme_get_log_smart(hdl, NVME_NSID_ALL, &smart_log);
4403+
if (err) {
4404+
fprintf(stderr, "Failed to get SMART log: %s\n",
4405+
nvme_strerror(err));
4406+
return err;
4407+
}
4408+
4409+
if (is_json)
4410+
print_micron_health_log_json(&smart_log, argv[optind]);
4411+
else
4412+
print_micron_health_log_normal(&smart_log, argv[optind]);
4413+
4414+
return 0;
4415+
}
4416+
4417+
/*
4418+
* Identify Controller field offsets for Micron-specific fields
4419+
* IPMSR: Interval Power Measurement Sample Rate (2 bytes)
4420+
* MSMT: Maximum Stop Measurement Time (2 bytes)
4421+
* PMS: Power Measurement Support - bit 21 of CTRATT
4422+
*/
4423+
#define ID_CTRL_RSVD388_OFFSET 388
4424+
#define ID_CTRL_IPMSR_OFFSET 392
4425+
#define ID_CTRL_MSMT_OFFSET 394
4426+
#define CTRATT_PMS_BIT 21
4427+
4428+
static inline __u16 get_id_ctrl_ipmsr(struct nvme_id_ctrl *ctrl)
4429+
{
4430+
__u8 *p = &ctrl->rsvd388[ID_CTRL_IPMSR_OFFSET - ID_CTRL_RSVD388_OFFSET];
4431+
4432+
return le16_to_cpu(*(__le16 *)p);
4433+
}
4434+
4435+
static inline __u16 get_id_ctrl_msmt(struct nvme_id_ctrl *ctrl)
4436+
{
4437+
__u8 *p = &ctrl->rsvd388[ID_CTRL_MSMT_OFFSET - ID_CTRL_RSVD388_OFFSET];
4438+
4439+
return le16_to_cpu(*(__le16 *)p);
4440+
}
4441+
4442+
static inline bool get_id_ctrl_pms(struct nvme_id_ctrl *ctrl)
4443+
{
4444+
return (le32_to_cpu(ctrl->ctratt) >> CTRATT_PMS_BIT) & 0x1;
4445+
}
4446+
4447+
/* Micron vendor-specific id-ctrl fields display */
4448+
static void micron_id_ctrl_vs(__u8 *vs, struct json_object *root)
4449+
{
4450+
/* Cast back to get full ctrl structure for our extended fields */
4451+
struct nvme_id_ctrl *ctrl =
4452+
(struct nvme_id_ctrl *)(vs - offsetof(struct nvme_id_ctrl, vs));
4453+
__u16 ipmsr = get_id_ctrl_ipmsr(ctrl);
4454+
__u16 msmt = get_id_ctrl_msmt(ctrl);
4455+
bool pms = get_id_ctrl_pms(ctrl);
4456+
4457+
if (root) {
4458+
/* JSON output */
4459+
json_object_add_value_int(root, "pms", pms ? 1 : 0);
4460+
json_object_add_value_uint(root, "ipmsr", ipmsr);
4461+
json_object_add_value_uint(root, "msmt", msmt);
4462+
} else {
4463+
/* Normal output */
4464+
printf("pms : %u\n", pms ? 1 : 0);
4465+
printf("ipmsr : %u\n", ipmsr);
4466+
printf("msmt : %u\n", msmt);
4467+
}
4468+
}
4469+
4470+
static int micron_id_ctrl(int argc, char **argv, struct command *acmd,
4471+
struct plugin *plugin)
4472+
{
4473+
_cleanup_nvme_global_ctx_ struct nvme_global_ctx *ctx = NULL;
4474+
_cleanup_nvme_transport_handle_ struct nvme_transport_handle *hdl = NULL;
4475+
const char *desc = "Identify Controller with Micron vendor fields";
4476+
enum eDriveModel eModel = UNKNOWN_MODEL;
4477+
struct nvme_id_ctrl ctrl = { 0 };
4478+
nvme_print_flags_t flags;
4479+
int err = 0;
4480+
4481+
OPT_ARGS(opts) = {
4482+
OPT_END()
4483+
};
4484+
4485+
err = micron_parse_options(&ctx, &hdl, argc, argv, desc, opts, &eModel);
4486+
if (err < 0)
4487+
return err;
4488+
4489+
if (eModel == UNKNOWN_MODEL) {
4490+
fprintf(stderr,
4491+
"WARNING: Drive not recognized as Micron, proceeding anyway\n");
4492+
}
4493+
4494+
err = validate_output_format(nvme_cfg.output_format, &flags);
4495+
if (err < 0) {
4496+
fprintf(stderr, "Invalid output format\n");
4497+
return err;
4498+
}
4499+
4500+
err = nvme_identify_ctrl(hdl, &ctrl);
4501+
if (err) {
4502+
fprintf(stderr, "identify controller failed: %s\n",
4503+
nvme_strerror(err));
4504+
return err;
4505+
}
4506+
4507+
nvme_show_id_ctrl(&ctrl, flags, micron_id_ctrl_vs);
4508+
4509+
return 0;
4510+
}

plugins/micron/micron-nvme.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ PLUGIN(NAME("micron", "Micron vendor specific extensions", NVME_VERSION),
4646
ENTRY("vs-work-load-log", "Retrieve Workload logs", micron_work_load_log)
4747
ENTRY("vs-vendor-telemetry-log",
4848
"Retrieve Vendor Telemetry logs", micron_vendor_telemetry_log)
49+
ENTRY("smart-log", "Retrieve SMART/Health Log",
50+
micron_health_info)
51+
ENTRY("id-ctrl", "Identify Controller", micron_id_ctrl)
4952
)
5053
);
5154

0 commit comments

Comments
 (0)