Skip to content

Commit 583958d

Browse files
YousefSalamameta-codesync[bot]
authored andcommitted
admin: add inspect subcommand to derivation-queue
Summary: Add an `inspect` subcommand to `monad derivation-queue` that shows the Zelos DAG state for a specific queue item. This helps debug stuck or broken items by showing: - Whether the `needed` node exists and its metadata (retry count, priority, enqueue time, head changeset) - Whether the item is in the `ready` or `deriving` state - Forward dependencies and whether each dependency's `needed` node still exists (marks broken deps) - Reverse dependencies (items blocked on this one) Reviewed By: lmvasquezg Differential Revision: D103875695 fbshipit-source-id: d25eaffe042b896a0e838aacfc7404f9cbce2dff
1 parent 4c09d3f commit 583958d

3 files changed

Lines changed: 166 additions & 0 deletions

File tree

eden/mononoke/repo_attributes/repo_derivation_queues/src/lib.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,28 @@ pub use crate::dag_items::derivation_priority_to_str;
4949
pub use crate::errors::InternalError;
5050
pub use crate::underived::build_underived_batched_graph;
5151

52+
/// Status of a single dependency: suffix and whether its `needed` node exists.
53+
pub struct DepStatus {
54+
pub suffix: String,
55+
pub needed_exists: bool,
56+
}
57+
58+
/// Whether a queue item is in the ready state, and which priority queue.
59+
pub enum ReadyState {
60+
NotReady,
61+
ReadyHighPri,
62+
ReadyLowPri,
63+
}
64+
65+
/// Result of inspecting a specific DAG item in the derivation queue.
66+
pub struct InspectResult {
67+
pub needed: Option<DagItemInfo>,
68+
pub ready_state: ReadyState,
69+
pub is_deriving: bool,
70+
pub forward_deps: Vec<DepStatus>,
71+
pub reverse_deps: Vec<DepStatus>,
72+
}
73+
5274
/// Lightweight item returned from dequeue — just the ID and priority.
5375
/// The full DerivationDagItem is constructed during claim_derivation
5476
/// after fetching data from Zeus.
@@ -155,6 +177,14 @@ pub trait DerivationQueue {
155177

156178
async fn summary(&self, ctx: &CoreContext) -> Result<DerivationQueueSummary, InternalError>;
157179

180+
/// Inspect the Zelos DAG state of a specific item, including its node
181+
/// states, forward dependencies, and reverse dependencies.
182+
async fn inspect(
183+
&self,
184+
ctx: &CoreContext,
185+
item_id: DagItemId,
186+
) -> Result<InspectResult, InternalError>;
187+
158188
fn derived_data_manager(&self) -> &DerivedDataManager;
159189

160190
fn repo_id(&self) -> RepositoryId;

eden/mononoke/tools/admin/src/commands/derivation_queue.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
*/
77

88
mod enqueue;
9+
mod inspect;
910
mod summary;
1011
mod unsafe_evict;
1112

@@ -18,6 +19,7 @@ use bookmarks::Bookmarks;
1819
use clap::Parser;
1920
use clap::Subcommand;
2021
use enqueue::EnqueueArgs;
22+
use inspect::InspectArgs;
2123
use metaconfig_types::RepoConfig;
2224
use metaconfig_types::RepoConfigRef;
2325
use mononoke_app::MononokeApp;
@@ -49,6 +51,8 @@ pub enum DerivationQueueSubcommand {
4951
Summary(SummaryArgs),
5052
/// Evict an item (referenced by root cs_id and derived data type) from the derivation queue. WARNING: can leave dependent items in the queue stuck
5153
UnsafeEvict(UnsafeEvictArgs),
54+
/// Inspect the Zelos DAG state of a specific item in the derivation queue
55+
Inspect(InspectArgs),
5256
}
5357

5458
#[facet::container]
@@ -97,5 +101,8 @@ pub async fn run(app: MononokeApp, args: CommandArgs) -> Result<()> {
97101
DerivationQueueSubcommand::UnsafeEvict(args) => {
98102
unsafe_evict::unsafe_evict(&ctx, &repo, config_name, args).await
99103
}
104+
DerivationQueueSubcommand::Inspect(args) => {
105+
inspect::inspect(&ctx, &repo, config_name, args).await
106+
}
100107
}
101108
}
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
*
4+
* This software may be used and distributed according to the terms of the
5+
* GNU General Public License version 2.
6+
*/
7+
8+
use anyhow::Result;
9+
use anyhow::anyhow;
10+
use clap::Args;
11+
use context::CoreContext;
12+
use mononoke_app::args::ChangesetArgs;
13+
use mononoke_app::args::DerivedDataArgs;
14+
use repo_derivation_queues::DagItemId;
15+
use repo_derivation_queues::ReadyState;
16+
use repo_derivation_queues::RepoDerivationQueuesRef;
17+
use repo_derivation_queues::derivation_priority_to_str;
18+
use repo_identity::RepoIdentityRef;
19+
20+
use super::Repo;
21+
22+
#[derive(Args)]
23+
pub struct InspectArgs {
24+
#[clap(flatten)]
25+
changeset_args: ChangesetArgs,
26+
27+
#[clap(flatten)]
28+
derived_data_args: DerivedDataArgs,
29+
30+
/// Stage ID for staged derivation items
31+
#[clap(long)]
32+
stage_id: Option<String>,
33+
}
34+
35+
pub async fn inspect(
36+
ctx: &CoreContext,
37+
repo: &Repo,
38+
config_name: &str,
39+
args: InspectArgs,
40+
) -> Result<()> {
41+
let derivation_queue = repo
42+
.repo_derivation_queues()
43+
.queue(config_name)
44+
.ok_or_else(|| anyhow!("Missing derivation queue for config {}", config_name))?;
45+
46+
let derived_data_type = args.derived_data_args.resolve_type()?;
47+
let cs_ids = args.changeset_args.resolve_changesets(ctx, repo).await?;
48+
49+
for cs_id in cs_ids {
50+
let item_id = DagItemId::new(
51+
repo.repo_identity().id(),
52+
config_name.to_string(),
53+
derived_data_type,
54+
cs_id,
55+
args.stage_id.clone(),
56+
);
57+
58+
println!(
59+
"Item: {:?}/{}/{}",
60+
derived_data_type,
61+
args.stage_id.as_deref().unwrap_or("(no stage)"),
62+
cs_id
63+
);
64+
65+
let result = derivation_queue.inspect(ctx, item_id).await?;
66+
67+
match &result.needed {
68+
Some(info) => {
69+
let ts = info
70+
.enqueue_timestamp()
71+
.map(|t| format!("{}s{}ms ago", t.since_seconds(), t.since_millis() % 1000))
72+
.unwrap_or_else(|| "unknown".to_string());
73+
println!(
74+
" needed: EXISTS (retry_count={}, priority={}, enqueued {})",
75+
info.retry_count(),
76+
derivation_priority_to_str(info.priority()),
77+
ts
78+
);
79+
println!(" head: {}", info.head_cs_id());
80+
}
81+
None => println!(" needed: MISSING"),
82+
}
83+
84+
let ready_str = match result.ready_state {
85+
ReadyState::NotReady => "no",
86+
ReadyState::ReadyHighPri => "YES (high priority)",
87+
ReadyState::ReadyLowPri => "YES (low priority)",
88+
};
89+
println!(" ready: {}", ready_str);
90+
println!(
91+
" deriving: {}",
92+
if result.is_deriving { "YES" } else { "no" }
93+
);
94+
95+
if result.forward_deps.is_empty() {
96+
println!(" forward deps: (none)");
97+
} else {
98+
println!(" forward deps ({}):", result.forward_deps.len());
99+
for dep in &result.forward_deps {
100+
let status = if dep.needed_exists {
101+
"EXISTS"
102+
} else {
103+
"MISSING"
104+
};
105+
let marker = if !dep.needed_exists { " <- BROKEN" } else { "" };
106+
println!(" {} -> needed: {}{}", dep.suffix, status, marker);
107+
}
108+
}
109+
110+
if result.reverse_deps.is_empty() {
111+
println!(" reverse deps: (none)");
112+
} else {
113+
println!(" reverse deps ({}):", result.reverse_deps.len());
114+
for dep in &result.reverse_deps {
115+
let status = if dep.needed_exists {
116+
"EXISTS"
117+
} else {
118+
"MISSING"
119+
};
120+
let marker = if !dep.needed_exists { " <- BROKEN" } else { "" };
121+
println!(" {} -> needed: {}{}", dep.suffix, status, marker);
122+
}
123+
}
124+
125+
println!();
126+
}
127+
128+
Ok(())
129+
}

0 commit comments

Comments
 (0)