Skip to content

Commit d5f69e3

Browse files
fix(core): fix opentelemetry upgrade issues (#9595)
**Description** This PR finalizes an incomplete upgrade to opentelemetry for tracing as well as introducing some important changes for effective tracing with opentelemetry. Note that the correct port for opentelemetry tracing is 4318. Previous examples showed the incorrect opencensus port of 14268, which was the old thrift protocol. Additionally, cross-alpha traces were not appearing in Jaeger. Queries requiring data from other alpha instances showed incomplete traces. Root Causes * Invalid UTF-8 in span attributes - srcFn structs contained binary data causing OTLP export failures * Wrong gRPC handlers - otelgrpc.NewClientHandler() used on servers instead of NewServerHandler() * Missing trace context extraction - Incoming gRPC requests weren't properly extracting trace context This PR also introduces a new `--trace` super flag option in which you can uniquely identify your alpha or zero service by service name. Previously all nodes shared the same name (either dgraph.alpha or dgraph.zero). This prevented understanding of how latency was a factor _across_ different cluster components. An example of setting the service name: ```yaml dgraph alpha --trace "jaeger=http://jaeger:4318; service=alpha1;" ``` This PR also emits the newer `namespace` tag for traces. Traces from Alphas and Zeros will be tagged with `dgraph.alpha` and `dgraph.zero`, respectively. These namespace selections aren't usable in the Jaeger UI, however newer tracing systems such as Grafana's Tempo have support for them. Note, for backwards compatibility, omitting the `service` trace superflag will revert tracing to the old `dgraph.alpha` service name. **Checklist** - [x] The PR title follows the [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/#summary) syntax, leading with `fix:`, `feat:`, `chore:`, `ci:`, etc. - [x] Code compiles correctly and linting (via trunk) passes locally - [x] Tests added for new functionality, or regression tests for bug fixes added as applicable - [ ] For public APIs, new features, etc., a PR on the [docs repo](https://github.com/dgraph-io/dgraph-docs) staged and linked here. This process can be simplified by going to the [public docs site](https://docs.dgraph.io/) and clicking the "Edit this page" button at the bottom of page(s) relevant to your changes. Ensure that you indicate in the PR that this is an **unreleased** feature so that it does not get merged into the main docs prematurely.
1 parent ffab4bb commit d5f69e3

21 files changed

Lines changed: 1158 additions & 60 deletions

File tree

compose/cluster-viz.sh

Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
#!/bin/bash
2+
# Visualize Dgraph cluster topology from docker-compose.yml
3+
4+
FILE="${1:-docker-compose.yml}"
5+
6+
if [[ ! -f ${FILE} ]]; then
7+
echo "Usage: $0 [docker-compose.yml]"
8+
exit 1
9+
fi
10+
11+
# Flatten the file (join continuation lines) for easier parsing
12+
FLAT=$(tr '\n' ' ' <"${FILE}" | sed 's/ */ /g' || true)
13+
14+
# Extract zero nodes (find --my=zeroN:port patterns near "zero")
15+
zeros=$(echo "${FLAT}" | grep -oE '\-\-my=zero[a-zA-Z0-9_-]*:[0-9]+' | sed -E 's/--my=([^:]+):.*/\1/' | sort -u || true)
16+
17+
# Extract alpha nodes with their group assignments
18+
TMPFILE=$(mktemp)
19+
20+
# Extract all alphas by their --my flag. Group assignment is optional in flags.
21+
# If no group= is found near the alpha's command, put it under UNASSIGNED.
22+
alphas=$(echo "${FLAT}" | grep -oE '\-\-my=alpha[a-zA-Z0-9_-]*:[0-9]+' | sed -E 's/--my=([^:]+):.*/\1/' | sort -u || true)
23+
for alpha in ${alphas}; do
24+
# Look for group= near this alpha occurrence in the flattened compose.
25+
# Many compose files won't specify it; in that case group is assigned dynamically by Zero.
26+
# Use awk (portable on macOS) instead of BSD sed regex intervals.
27+
group=$(awk -v s="${FLAT}" -v a="--my=${alpha}:" '
28+
BEGIN { pos = index(s, a); if (pos == 0) exit }
29+
END {
30+
win = substr(s, pos, 600)
31+
if (match(win, /group=[0-9]+/)) {
32+
g = substr(win, RSTART, RLENGTH)
33+
split(g, parts, "=")
34+
print parts[2]
35+
}
36+
}
37+
' </dev/null || true)
38+
if [[ -z ${group} ]]; then
39+
group="UNASSIGNED"
40+
fi
41+
echo "${group} ${alpha}"
42+
done >"${TMPFILE}"
43+
44+
# Get unique groups (numeric groups first, UNASSIGNED last, never duplicated)
45+
numeric_groups=$(grep -E '^[0-9]+ ' "${TMPFILE}" | cut -d' ' -f1 | sort -n | uniq || true)
46+
unique_groups="${numeric_groups}"
47+
if grep -q '^UNASSIGNED ' "${TMPFILE}"; then
48+
unique_groups="${unique_groups} UNASSIGNED"
49+
fi
50+
51+
# Extract ratel if present
52+
has_ratel=$(grep -q 'ratel' "${FILE}" && echo "yes" || echo "no")
53+
54+
# Get replica count
55+
replicas=$(grep -oE '\-\-replicas=[0-9]+' "${FILE}" | head -1 | cut -d= -f2 || true)
56+
replicas=${replicas:-1}
57+
58+
# Calculate max alphas per group for width calculation
59+
max_per_group=0
60+
for group in ${unique_groups}; do
61+
cnt=$(grep -c "^${group} " "${TMPFILE}" 2>/dev/null || echo 0)
62+
if [[ ${cnt} -gt ${max_per_group} ]]; then
63+
max_per_group=${cnt}
64+
fi
65+
done
66+
67+
# Count zeros
68+
zero_count=$(echo "${zeros}" | wc -w | tr -d ' ' || true)
69+
70+
# Box width: 14 chars per node (┌──────────┐ + 2 spaces) + margins
71+
# Use the larger of zeros or max alphas per group
72+
max_nodes=${zero_count}
73+
if [[ ${max_per_group} -gt ${max_nodes} ]]; then
74+
max_nodes=${max_per_group}
75+
fi
76+
[[ ${max_nodes} -lt 3 ]] && max_nodes=3
77+
78+
inner_width=$((max_nodes * 14 + 4))
79+
[[ ${inner_width} -lt 60 ]] && inner_width=60
80+
outer_width=$((inner_width + 4))
81+
82+
# Total line width (display columns between outer left and right border)
83+
W=$((outer_width))
84+
85+
# Print N space characters
86+
sp() {
87+
local i
88+
for ((i = 0; i < $1; i++)); do printf " "; done
89+
}
90+
91+
# Print N copies of a character
92+
rp() {
93+
local i
94+
for ((i = 0; i < $1; i++)); do printf "%s" "$2"; done
95+
}
96+
97+
# Finish outer row: pad from current column to W, then print |
98+
fin() {
99+
sp "$((W - $1))"
100+
printf "|\n"
101+
}
102+
103+
# Finish inner row (inside group box): pad then print | |
104+
# Total should be W+1 cols: content($1) + spaces + 3(| |) = W+1
105+
fini() {
106+
sp "$((W - $1 - 2))"
107+
printf "| |\n"
108+
}
109+
110+
# Separator using full width
111+
sep() {
112+
printf "%s" "$1"
113+
rp "${W}" "$2"
114+
printf "%s\n" "$3"
115+
}
116+
117+
# Print a row that starts with '|' and does NOT yet include the final right border.
118+
# Pads with spaces so the final '|' is always aligned.
119+
outln() {
120+
local s="$1"
121+
local l=${#s}
122+
# Total line length must be W + 2 (left border + interior + right border).
123+
# If s already includes the left border '|', it must be padded to length W+1,
124+
# then we print the final right border.
125+
printf "%s" "${s}"
126+
local pad=$((W + 1 - l))
127+
if [[ ${pad} -lt 0 ]]; then
128+
pad=0
129+
fi
130+
sp "${pad}"
131+
printf "|\n"
132+
}
133+
134+
# Render a row inside the group box.
135+
# The group box top uses: "| +" + GW*"-" + "+".
136+
# For interior rows we use: "| |" + <payload> + <pad-to-GW> + "|".
137+
out_group() {
138+
local payload="$1"
139+
local plen=${#payload}
140+
local pad=$((GW - plen))
141+
if [[ ${pad} -lt 0 ]]; then
142+
pad=0
143+
fi
144+
local s="| |${payload}"
145+
# pad inside the group box up to width GW
146+
for ((i = 0; i < pad; i++)); do
147+
s+=" "
148+
done
149+
s+="|"
150+
outln "${s}"
151+
}
152+
153+
echo ""
154+
155+
# === HEADER ===
156+
sep "+" "=" "+"
157+
outln "| DGRAPH CLUSTER TOPOLOGY"
158+
sep "+" "=" "+"
159+
echo ""
160+
161+
# === ZERO LAYER ===
162+
sep "+" "-" "+"
163+
outln "| ZERO LAYER (Cluster Coordination)"
164+
sep "+" "-" "+"
165+
166+
# Zero boxes: each is 14 cols (box 12 + 2 spaces)
167+
line="| "
168+
for z in ${zeros}; do line+="+----------+ "; done
169+
outln "${line}"
170+
line="| "
171+
for z in ${zeros}; do line+=$(printf "| %-8s | " "${z}"); done
172+
outln "${line}"
173+
line="| "
174+
for z in ${zeros}; do line+="| (zero) | "; done
175+
outln "${line}"
176+
line="| "
177+
for z in ${zeros}; do line+="+----------+ "; done
178+
outln "${line}"
179+
180+
sep "+" "-" "+"
181+
echo ""
182+
183+
# === ALPHA LAYER ===
184+
sep "+" "-" "+"
185+
outln "| ALPHA LAYER (Data Storage) - Replicas: ${replicas}"
186+
sep "+" "-" "+"
187+
188+
# Group box inner width
189+
GW=$((inner_width - 2))
190+
191+
for group in ${unique_groups}; do
192+
alphas=$(grep "^${group} " "${TMPFILE}" | cut -d' ' -f2 || true)
193+
count=$(echo "${alphas}" | wc -l | tr -d ' ' || true)
194+
195+
# Empty row
196+
outln "|"
197+
198+
# Group box top
199+
line="| +"
200+
for ((i = 0; i < GW; i++)); do line+="-"; done
201+
line+="+"
202+
outln "${line}"
203+
204+
# Group title row
205+
if [[ ${group} == "UNASSIGNED" ]]; then
206+
gtitle=" GROUP UNASSIGNED (Raft Replicas: ${count})"
207+
else
208+
gtitle=" GROUP ${group} (Raft Replicas: ${count})"
209+
fi
210+
out_group "${gtitle}"
211+
212+
# Alpha boxes
213+
line=" "
214+
for a in ${alphas}; do line+="+----------+ "; done
215+
out_group "${line}"
216+
line=" "
217+
for a in ${alphas}; do line+=$(printf "| %-8s | " "${a}" || true); done
218+
out_group "${line}"
219+
line=" "
220+
for a in ${alphas}; do line+="| (alpha) | "; done
221+
out_group "${line}"
222+
line=" "
223+
for a in ${alphas}; do line+="+----------+ "; done
224+
out_group "${line}"
225+
226+
# Group box bottom
227+
line="| +"
228+
for ((i = 0; i < GW; i++)); do line+="-"; done
229+
line+="+"
230+
outln "${line}"
231+
done
232+
233+
outln "|"
234+
sep "+" "-" "+"
235+
236+
# === RATEL ===
237+
if [[ ${has_ratel} == "yes" ]]; then
238+
echo ""
239+
sep "+" "-" "+"
240+
outln "| UI LAYER"
241+
sep "+" "-" "+"
242+
outln "| +----------+"
243+
outln "| | ratel | (Web UI on :8000)"
244+
outln "| +----------+"
245+
sep "+" "-" "+"
246+
fi
247+
248+
rm -f "${TMPFILE}"
249+
250+
echo ""
251+
echo "Legend: Alphas in same GROUP replicate data via Raft consensus"
252+
echo ""

compose/compose.go

Lines changed: 55 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,9 @@ type options struct {
7979
DataVol bool
8080
TmpFS bool
8181
UserOwnership bool
82-
Jaeger bool
82+
Jaeger int
83+
TraceRatio string
84+
TraceService bool
8385
Metrics bool
8486
PortOffset int
8587
Verbosity int
@@ -202,8 +204,15 @@ func initService(basename string, idx, grpcPort int) service {
202204
svc.Command += fmt.Sprintf(" --cwd=/data/%s", svc.name)
203205
}
204206
svc.Command += " " + basename
205-
if opts.Jaeger {
206-
svc.Command += ` --trace "jaeger=http://jaeger:14268;"`
207+
if opts.Jaeger > 0 {
208+
traceFlag := "jaeger=http://jaeger:4318;"
209+
if opts.TraceRatio != "" {
210+
traceFlag += fmt.Sprintf(" ratio=%s;", opts.TraceRatio)
211+
}
212+
if opts.TraceService {
213+
traceFlag += fmt.Sprintf(" service=%s;", svc.name)
214+
}
215+
svc.Command += fmt.Sprintf(` --trace "%s"`, traceFlag)
207216
}
208217
return svc
209218
}
@@ -375,23 +384,44 @@ func getVolume(vol string) volume {
375384

376385
}
377386

378-
func getJaeger() service {
379-
svc := service{
380-
Image: "jaegertracing/all-in-one:1.18",
387+
func getJaeger(version int) service {
388+
if version == 2 {
389+
// Jaeger v2.x uses OpenTelemetry Collector architecture with YAML config.
390+
// OTLP receivers bind to localhost by default, so we need JAEGER_LISTEN_HOST=0.0.0.0
391+
// for container environments.
392+
return service{
393+
Image: "jaegertracing/jaeger:latest",
394+
ContainerName: containerName("jaeger"),
395+
Ports: []string{
396+
toPort(4318),
397+
toPort(16686),
398+
},
399+
Environment: []string{
400+
"JAEGER_LISTEN_HOST=0.0.0.0",
401+
},
402+
}
403+
}
404+
// Jaeger v1.60 with badger storage
405+
return service{
406+
Image: "jaegertracing/all-in-one:1.60",
381407
ContainerName: containerName("jaeger"),
382-
WorkingDir: "/working/jaeger",
408+
User: "0",
383409
Ports: []string{
384-
toPort(14268),
410+
toPort(4318),
385411
toPort(16686),
386412
},
413+
Volumes: []volume{{
414+
Type: "volume",
415+
Source: "jaeger-volume",
416+
Target: "/jaeger",
417+
}},
387418
Environment: []string{
388419
"SPAN_STORAGE_TYPE=badger",
389420
},
390421
Command: "--badger.ephemeral=false" +
391-
" --badger.directory-key /working/jaeger" +
392-
" --badger.directory-value /working/jaeger",
422+
" --badger.directory-key /jaeger" +
423+
" --badger.directory-value /jaeger",
393424
}
394-
return svc
395425
}
396426

397427
func getMinio(minioDataDir string) service {
@@ -421,12 +451,12 @@ func getRatel() service {
421451
portFlag = fmt.Sprintf(" -port=%d", opts.RatelPort)
422452
}
423453
svc := service{
424-
Image: opts.Image + ":" + opts.Tag,
454+
Image: "dgraph/ratel:latest",
425455
ContainerName: containerName("ratel"),
426456
Ports: []string{
427457
toPort(opts.RatelPort),
428458
},
429-
Command: "dgraph-ratel" + portFlag,
459+
Command: portFlag,
430460
}
431461
return svc
432462
}
@@ -543,8 +573,13 @@ func main() {
543573
"run as the current user rather than root")
544574
cmd.PersistentFlags().BoolVar(&opts.TmpFS, "tmpfs", false,
545575
"store w and zw directories on a tmpfs filesystem")
546-
cmd.PersistentFlags().BoolVarP(&opts.Jaeger, "jaeger", "j", false,
547-
"include jaeger service")
576+
cmd.PersistentFlags().IntVarP(&opts.Jaeger, "jaeger", "j", 0,
577+
"include jaeger service (1 for v1.60, 2 for v2.x)")
578+
cmd.PersistentFlags().Lookup("jaeger").NoOptDefVal = "1"
579+
cmd.PersistentFlags().StringVar(&opts.TraceRatio, "trace_ratio", "",
580+
"ratio of queries to trace (e.g., 0.01 for 1%)")
581+
cmd.PersistentFlags().BoolVar(&opts.TraceService, "trace_service", false,
582+
"use compose service name as trace service name (e.g., alpha1, zero1)")
548583
cmd.PersistentFlags().BoolVarP(&opts.Metrics, "metrics", "m", false,
549584
"include metrics (prometheus, grafana) services")
550585
cmd.PersistentFlags().IntVarP(&opts.PortOffset, "port_offset", "o", 100,
@@ -689,8 +724,11 @@ func main() {
689724
cfg.Volumes["data"] = stringMap{}
690725
}
691726

692-
if opts.Jaeger {
693-
services["jaeger"] = getJaeger()
727+
if opts.Jaeger > 0 {
728+
services["jaeger"] = getJaeger(opts.Jaeger)
729+
if opts.Jaeger == 1 {
730+
cfg.Volumes["jaeger-volume"] = stringMap{}
731+
}
694732
}
695733

696734
if opts.Ratel {

contrib/jepsen/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ var (
116116
testCount = pflag.IntP("test-count", "c", 1, "Test count per Jepsen test.")
117117
jaeger = pflag.StringP("jaeger", "j", "",
118118
"Run with Jaeger collector. Set to empty string to disable collection to Jaeger."+
119-
" Otherwise set to http://jaeger:14268.")
119+
" Otherwise set to http://jaeger:4318.")
120120
jaegerSaveTraces = pflag.Bool("jaeger-save-traces", true, "Save Jaeger traces on test error.")
121121
deferDbTeardown = pflag.Bool("defer-db-teardown", false,
122122
"Wait until user input to tear down DB nodes")

0 commit comments

Comments
 (0)