diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/normalizer/Dimension.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/normalizer/Dimension.java
index 6ad068471..7caece7a0 100644
--- a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/normalizer/Dimension.java
+++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/normalizer/Dimension.java
@@ -19,13 +19,13 @@
 import java.util.function.Supplier;
 
 /**
- * A layer of the {@code Term} normalization stack, in increasing order of aggressiveness. A
- * {@code TermAnalyzer} applies a configured prefix of these to each token; the declaration order is
+ * A layer of the {@link Term} normalization stack, in increasing order of aggressiveness. A
+ * {@link TermAnalyzer} applies a configured prefix of these to each token; the declaration order is
  * the canonical pipeline order, because the transforms do not commute (case folding then accent
  * folding differs from the reverse for Turkish dotted/dotless i and the German eszett).
  *
  * <p>This enum is the single definition of the character-level steps: each one carries its default
- * {@link CharSequenceNormalizer}, which both {@code TermAnalyzer} and {@link TextNormalizer} read
+ * {@link CharSequenceNormalizer}, which both {@link TermAnalyzer} and {@link TextNormalizer} read
  * from rather than re-listing. The default is resolved lazily, so loading this enum does not eagerly
  * initialize heavy data such as the confusables table.</p>
  *
diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/normalizer/Term.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/normalizer/Term.java
new file mode 100644
index 000000000..eda3c4107
--- /dev/null
+++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/normalizer/Term.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.tools.util.normalizer;
+
+import java.util.EnumMap;
+import java.util.List;
+
+import opennlp.tools.util.Span;
+
+/**
+ * One token as a stack of normalization layers. The {@link #original()} form is the canonical
+ * source of truth; the other layers are derived, increasingly aggressive {@link Dimension}s tuned
+ * for matching and search. The dimensions configured on the producing {@link TermAnalyzer} are
+ * computed eagerly and cached; any other dimension is computed on first request, applied on top of
+ * the {@link #normalized() configured form}, and then cached.
+ *
+ * <p>Because the original is always retained, aggressive folding is safe: a match on a derived layer
+ * can always be reported in original coordinates through {@link #span()}. Querying a configured
+ * layer, or {@link #peel() peeling} the last-applied one, is O(1); adding an unconfigured dimension
+ * costs one transform on first touch and is O(1) thereafter.</p>
+ *
+ * <p>Instances are created by {@link TermAnalyzer} and are not thread-safe (the lazy cache is
+ * mutated on first access of an unconfigured dimension).</p>
+ */
+public final class Term {
+
+  private final TermAnalyzer analyzer;
+  private final Span span;
+  private final String posTag;
+  private final EnumMap<Dimension, String> layers = new EnumMap<>(Dimension.class);
+
+  Term(TermAnalyzer analyzer, String original, Span span, String posTag) {
+    this.analyzer = analyzer;
+    this.span = span;
+    this.posTag = posTag;
+    String value = original;
+    layers.put(Dimension.ORIGINAL, value);
+    for (final Dimension dimension : analyzer.dimensions()) {
+      value = analyzer.apply(dimension, value, posTag);
+      layers.put(dimension, value);
+    }
+  }
+
+  /**
+   * {@return the source span of this token, or {@code null} if it was supplied as a pre-tokenized
+   * string} The span indexes into the text passed to {@link TermAnalyzer#analyze(CharSequence)}.
+   */
+  public Span span() {
+    return span;
+  }
+
+  /**
+   * {@return the original token text}
+   */
+  public String original() {
+    return layers.get(Dimension.ORIGINAL);
+  }
+
+  /**
+   * {@return the token at the analyzer's final configured dimension} Equal to {@link #original()}
+   * when no dimensions were configured.
+   */
+  public String normalized() {
+    return at(analyzer.finalDimension());
+  }
+
+  /**
+   * Returns the token at {@code dimension}. Configured dimensions are cached; an unconfigured
+   * dimension is computed by applying its transform to {@link #normalized()} and then cached.
+   *
+   * <p>Note: an unconfigured dimension is applied on top of {@link #normalized()} (the most
+   * aggressive configured layer), not spliced into canonical pipeline order. Because the transforms
+   * do not commute (see {@link Dimension}), requesting a dimension that ranks <em>earlier</em> than
+   * the configured ones can differ from having configured it. For example, asking for
+   * {@link Dimension#CASE_FOLD} on an analyzer configured only through {@link Dimension#ACCENT_FOLD}
+   * case-folds the already accent-folded text, which is not the same as case-folding first.
+   * Configure the dimension on the analyzer when canonical order matters.</p>
+   *
+   * @param dimension The dimension to project to.
+   * @return The token at that dimension.
+   * @throws IllegalStateException if the dimension needs an engine or tag that was not configured
+   *     (see {@link Dimension#STEM} and {@link Dimension#LEMMA}).
+   */
+  public String at(Dimension dimension) {
+    final String cached = layers.get(dimension);
+    if (cached != null) {
+      return cached;
+    }
+    final String value = analyzer.apply(dimension, normalized(), posTag);
+    layers.put(dimension, value);
+    return value;
+  }
+
+  /**
+   * {@return the token at the dimension just below the final configured one} This is the
+   * last-applied layer removed (for example the form before stemming when {@link Dimension#STEM}
+   * is the final dimension); equal to {@link #original()} when at most one dimension is configured.
+   */
+  public String peel() {
+    final List<Dimension> dimensions = analyzer.dimensions();
+    if (dimensions.size() < 2) {
+      return original();
+    }
+    return at(dimensions.get(dimensions.size() - 2));
+  }
+}
diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/normalizer/TermAnalyzer.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/normalizer/TermAnalyzer.java
new file mode 100644
index 000000000..0d9956e8e
--- /dev/null
+++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/normalizer/TermAnalyzer.java
@@ -0,0 +1,368 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.tools.util.normalizer;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.EnumMap;
+import java.util.EnumSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Objects;
+import java.util.Set;
+
+import opennlp.tools.lemmatizer.Lemmatizer;
+import opennlp.tools.stemmer.Stemmer;
+import opennlp.tools.tokenize.uax29.WordTokenizer;
+import opennlp.tools.util.Span;
+
+/**
+ * Builds {@link Term}s by segmenting text and applying a configured stack of normalization
+ * {@link Dimension}s to each token. The analyzer is the configuration; each {@link Term} is the
+ * layered result for one token, with the configured dimensions computed eagerly and any other
+ * dimension computed lazily on first request.
+ *
+ * <p>Segmentation uses the Unicode {@linkplain WordTokenizer UAX&#160;#29 word tokenizer}, so the
+ * input does not need to be pre-tokenized. The character-level dimensions ({@link Dimension#NFC}
+ * through {@link Dimension#ACCENT_FOLD}) have built-in defaults; {@link Dimension#STEM} and
+ * {@link Dimension#LEMMA} are enabled by supplying a {@link Stemmer} or {@link Lemmatizer}.</p>
+ *
+ * <p>An instance is immutable and is thread-safe when its configured transforms are. The built-in
+ * character normalizers are stateless, but the Snowball stemmers are not, so an analyzer configured
+ * with a {@link Stemmer} (for example through {@link NormalizationProfile#searchAnalyzer()}) should
+ * not be shared across threads when {@link Dimension#STEM} is used. Build one with
+ * {@link #builder()}.</p>
+ */
+public final class TermAnalyzer {
+
+  private final List<Dimension> chain;
+  private final Dimension finalDimension;
+  private final EnumMap<Dimension, CharSequenceNormalizer> transforms;
+  private final Stemmer stemmer;
+  private final Lemmatizer lemmatizer;
+  private final WordTokenizer tokenizer;
+
+  private TermAnalyzer(Builder builder) {
+    final List<Dimension> ordered = new ArrayList<>(builder.chain);
+    Collections.sort(ordered); // canonical pipeline order (enum declaration order)
+    this.chain = List.copyOf(ordered);
+    this.finalDimension = ordered.isEmpty() ? Dimension.ORIGINAL : ordered.get(ordered.size() - 1);
+    // Only the per-analyzer overrides from the builder; the defaults live on Dimension itself.
+    this.transforms = new EnumMap<>(builder.transforms);
+    this.stemmer = builder.stemmer;
+    this.lemmatizer = builder.lemmatizer;
+    this.tokenizer = builder.tokenizer;
+  }
+
+  /**
+   * {@return a new builder}
+   */
+  public static Builder builder() {
+    return new Builder();
+  }
+
+  /**
+   * Segments {@code text} with the UAX&#160;#29 word tokenizer and returns one {@link Term} per
+   * word token, in order. The terms carry no part-of-speech tag, so {@link Dimension#LEMMA} is not
+   * available from them.
+   *
+   * @param text The text to analyze.
+   * @return The terms.
+   */
+  public List<Term> analyze(CharSequence text) {
+    final List<Span> spans = tokenizer.tokenizeSpans(text);
+    final List<Term> terms = new ArrayList<>(spans.size());
+    for (final Span span : spans) {
+      terms.add(new Term(this, span.getCoveredText(text).toString(), span, null));
+    }
+    return terms;
+  }
+
+  /**
+   * Returns one {@link Term} per supplied token, attaching the matching part-of-speech tag so that
+   * {@link Dimension#LEMMA} can be computed. The terms have no source span.
+   *
+   * @param tokens The tokens.
+   * @param tags   The part-of-speech tag for each token; must be the same length as {@code tokens}.
+   * @return The terms.
+   * @throws IllegalArgumentException if {@code tokens} and {@code tags} differ in length.
+   */
+  public List<Term> analyze(String[] tokens, String[] tags) {
+    if (tokens.length != tags.length) {
+      throw new IllegalArgumentException(
+          "tokens and tags must be the same length, got " + tokens.length + " and " + tags.length);
+    }
+    final List<Term> terms = new ArrayList<>(tokens.length);
+    for (int i = 0; i < tokens.length; i++) {
+      terms.add(new Term(this, tokens[i], null, tags[i]));
+    }
+    return terms;
+  }
+
+  /**
+   * {@return the configured dimensions that are computed eagerly, in canonical order} The list
+   * never includes {@link Dimension#ORIGINAL}, which is always present.
+   */
+  public List<Dimension> dimensions() {
+    return chain;
+  }
+
+  Dimension finalDimension() {
+    return finalDimension;
+  }
+
+  // Applies one dimension's transform to a single token value. Fails loudly when a token-level
+  // dimension was requested without the engine (or tag) it needs.
+  String apply(Dimension dimension, String input, String posTag) {
+    switch (dimension) {
+      case ORIGINAL:
+        return input;
+      case STEM:
+        if (stemmer == null) {
+          throw new IllegalStateException(
+              "Dimension STEM requires a Stemmer; configure it with builder().stem(...)");
+        }
+        return stemmer.stem(input).toString();
+      case LEMMA:
+        if (lemmatizer == null) {
+          throw new IllegalStateException(
+              "Dimension LEMMA requires a Lemmatizer; configure it with builder().lemmatize(...)");
+        }
+        if (posTag == null) {
+          throw new IllegalStateException(
+              "Dimension LEMMA requires a part-of-speech tag; use analyze(tokens, tags)");
+        }
+        return lemmatizer.lemmatize(new String[] {input}, new String[] {posTag})[0];
+      default:
+        // A builder override wins; otherwise the dimension's own default normalizer.
+        final CharSequenceNormalizer normalizer = transforms.containsKey(dimension)
+            ? transforms.get(dimension) : dimension.defaultNormalizer();
+        if (normalizer == null) {
+          throw new IllegalStateException("Dimension " + dimension + " has no default normalizer; "
+              + "configure it with builder().transform(" + dimension + ", ...)");
+        }
+        return normalizer.normalize(input).toString();
+    }
+  }
+
+  /** A builder for {@link TermAnalyzer}. */
+  public static final class Builder {
+
+    private final EnumSet<Dimension> chain = EnumSet.noneOf(Dimension.class);
+    private final EnumMap<Dimension, CharSequenceNormalizer> transforms =
+        new EnumMap<>(Dimension.class);
+    private Stemmer stemmer;
+    private Lemmatizer lemmatizer;
+    private WordTokenizer tokenizer = new WordTokenizer();
+
+    private Builder() {
+    }
+
+    /**
+     * Enables {@link Dimension#NFC}.
+     *
+     * @return this builder
+     */
+    public Builder nfc() {
+      chain.add(Dimension.NFC);
+      return this;
+    }
+
+    /**
+     * Enables {@link Dimension#NFKC}.
+     *
+     * @return this builder
+     */
+    public Builder nfkc() {
+      chain.add(Dimension.NFKC);
+      return this;
+    }
+
+    /**
+     * Enables {@link Dimension#WHITESPACE}.
+     *
+     * @return this builder
+     */
+    public Builder whitespace() {
+      chain.add(Dimension.WHITESPACE);
+      return this;
+    }
+
+    /**
+     * Enables {@link Dimension#WHITESPACE} with a specific normalizer, choosing the fold target and
+     * behavior. For a custom class and target use a {@link CharClass} method reference, for example
+     * {@code whitespace(CharClass.of(members, replacement)::collapse)}.
+     *
+     * @param normalizer The whitespace normalizer to use.
+     * @return this builder
+     */
+    public Builder whitespace(CharSequenceNormalizer normalizer) {
+      return transform(Dimension.WHITESPACE, normalizer);
+    }
+
+    /**
+     * Enables {@link Dimension#DASH}.
+     *
+     * @return this builder
+     */
+    public Builder dashes() {
+      chain.add(Dimension.DASH);
+      return this;
+    }
+
+    /**
+     * Enables {@link Dimension#DASH} with a specific normalizer (a custom dash set or target).
+     *
+     * @param normalizer The dash normalizer to use.
+     * @return this builder
+     */
+    public Builder dashes(CharSequenceNormalizer normalizer) {
+      return transform(Dimension.DASH, normalizer);
+    }
+
+    /**
+     * Enables {@link Dimension#CASE_FOLD}.
+     *
+     * @return this builder
+     */
+    public Builder caseFold() {
+      chain.add(Dimension.CASE_FOLD);
+      return this;
+    }
+
+    /**
+     * Enables {@link Dimension#CASE_FOLD} using the given locale's case rules (for example Turkish
+     * dotted/dotless i), instead of the default {@link Locale#ROOT}.
+     *
+     * @param locale The locale whose case rules to apply.
+     * @return this builder
+     */
+    public Builder caseFold(Locale locale) {
+      Objects.requireNonNull(locale, "locale");
+      return transform(Dimension.CASE_FOLD, CaseFoldCharSequenceNormalizer.getInstance(locale));
+    }
+
+    /**
+     * Enables {@link Dimension#ACCENT_FOLD}.
+     *
+     * @return this builder
+     */
+    public Builder accentFold() {
+      chain.add(Dimension.ACCENT_FOLD);
+      return this;
+    }
+
+    /**
+     * Enables {@link Dimension#ACCENT_FOLD} restricted to a specific set of scripts, instead of the
+     * default Latin/Greek/Cyrillic.
+     *
+     * @param foldScripts       The scripts whose diacritics to fold.
+     * @param foldStrokeLetters Whether to also fold stroke letters such as o-slash and l-stroke.
+     * @return this builder
+     */
+    public Builder accentFold(Set<Character.UnicodeScript> foldScripts, boolean foldStrokeLetters) {
+      return transform(Dimension.ACCENT_FOLD,
+          new AccentFoldCharSequenceNormalizer(foldScripts, foldStrokeLetters));
+    }
+
+    /**
+     * Enables {@link Dimension#CONFUSABLE_FOLD}.
+     *
+     * @return this builder
+     */
+    public Builder confusableFold() {
+      chain.add(Dimension.CONFUSABLE_FOLD);
+      return this;
+    }
+
+    /**
+     * Enables a character-level dimension with a specific normalizer, overriding its default (for
+     * example a locale-specific case fold for a language profile).
+     *
+     * @param dimension  The character-level dimension to enable.
+     * @param normalizer The normalizer to use for it.
+     * @return this builder
+     * @throws IllegalArgumentException if {@code dimension} is {@link Dimension#ORIGINAL},
+     *     {@link Dimension#STEM}, or {@link Dimension#LEMMA}.
+     */
+    public Builder transform(Dimension dimension, CharSequenceNormalizer normalizer) {
+      if (dimension == Dimension.ORIGINAL || dimension == Dimension.STEM
+          || dimension == Dimension.LEMMA) {
+        throw new IllegalArgumentException(
+            "transform(...) only applies to character-level dimensions, not " + dimension);
+      }
+      transforms.put(dimension, Objects.requireNonNull(normalizer, "normalizer"));
+      chain.add(dimension);
+      return this;
+    }
+
+    /**
+     * Enables {@link Dimension#STEM} through the given stemmer.
+     *
+     * @param value The stemmer.
+     * @return this builder
+     */
+    public Builder stem(Stemmer value) {
+      this.stemmer = Objects.requireNonNull(value, "stemmer");
+      chain.add(Dimension.STEM);
+      return this;
+    }
+
+    /**
+     * Enables {@link Dimension#LEMMA} through the given lemmatizer.
+     *
+     * @param value The lemmatizer.
+     * @return this builder
+     */
+    public Builder lemmatize(Lemmatizer value) {
+      this.lemmatizer = Objects.requireNonNull(value, "lemmatizer");
+      chain.add(Dimension.LEMMA);
+      return this;
+    }
+
+    /**
+     * Sets the tokenizer used by {@link TermAnalyzer#analyze(CharSequence)}.
+     *
+     * @param value The tokenizer.
+     * @return this builder
+     */
+    public Builder tokenizer(WordTokenizer value) {
+      this.tokenizer = Objects.requireNonNull(value, "tokenizer");
+      return this;
+    }
+
+    /**
+     * Sets the maximum token length of the tokenizer used by
+     * {@link TermAnalyzer#analyze(CharSequence)}. Convenience for
+     * {@code tokenizer(new WordTokenizer(maxTokenLength))}.
+     *
+     * @param maxTokenLength The maximum number of characters in a token.
+     * @return this builder
+     */
+    public Builder maxTokenLength(int maxTokenLength) {
+      this.tokenizer = new WordTokenizer(maxTokenLength);
+      return this;
+    }
+
+    /**
+     * {@return a new {@link TermAnalyzer} with this configuration}
+     */
+    public TermAnalyzer build() {
+      return new TermAnalyzer(this);
+    }
+  }
+}
diff --git a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/util/normalizer/ConfusablesTest.java b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/util/normalizer/ConfusablesTest.java
new file mode 100644
index 000000000..262fe5aa9
--- /dev/null
+++ b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/util/normalizer/ConfusablesTest.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.tools.util.normalizer;
+
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class ConfusablesTest {
+
+  private static String cp(int codePoint) {
+    return new String(Character.toChars(codePoint));
+  }
+
+  @Test
+  void testCyrillicLetterIsConfusableWithLatin() {
+    final String cyrillicA = cp(0x0430); // CYRILLIC SMALL LETTER A, looks like Latin 'a'
+    assertTrue(Confusables.confusable(cyrillicA, "a"));
+    assertFalse(Confusables.confusable(cyrillicA, "b"));
+  }
+
+  @Test
+  void testHomoglyphSpoofWordReducesToLatinSpelling() {
+    final String spoof = "p" + cp(0x0430) + "yp" + cp(0x0430) + "l"; // paypal with Cyrillic a's
+    assertTrue(Confusables.confusable(spoof, "paypal"));
+    assertEquals(Confusables.skeleton("paypal"), Confusables.skeleton(spoof));
+  }
+
+  @Test
+  void testHorizontalEllipsisFoldsToThreeFullStops() {
+    assertEquals(Confusables.skeleton("..."), Confusables.skeleton(cp(0x2026)));
+    assertTrue(Confusables.confusable(cp(0x2026), "..."));
+  }
+
+  @Test
+  void testDistinctWordsAreNotConfusable() {
+    assertFalse(Confusables.confusable("cat", "dog"));
+  }
+
+  @Test
+  void testSkeletonIsIdempotent() {
+    final String skeleton = Confusables.skeleton(cp(0x0430) + "bc");
+    assertEquals(skeleton, Confusables.skeleton(skeleton));
+  }
+
+  @Test
+  void testNormalizerProducesTheSkeleton() {
+    final String spoof = "p" + cp(0x0430) + "yp" + cp(0x0430) + "l";
+    assertEquals(Confusables.skeleton(spoof),
+        ConfusableSkeletonCharSequenceNormalizer.getInstance().normalize(spoof).toString());
+  }
+
+  @Test
+  void testMultipleCyrillicLookalikesFold() {
+    final String spoof = "d" + cp(0x0430) + "t" + cp(0x0430); // "data" with Cyrillic a's
+    assertEquals(Confusables.skeleton("data"), Confusables.skeleton(spoof));
+  }
+
+  @Test
+  void testTermConfusableFoldDimension() {
+    final String spoof = "p" + cp(0x0430) + "yp" + cp(0x0430) + "l";
+    final TermAnalyzer analyzer = TermAnalyzer.builder().confusableFold().build();
+    assertEquals(Confusables.skeleton("paypal"), analyzer.analyze(spoof).get(0).normalized());
+  }
+}
diff --git a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/util/normalizer/TermAnalyzerTest.java b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/util/normalizer/TermAnalyzerTest.java
new file mode 100644
index 000000000..56f16899d
--- /dev/null
+++ b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/util/normalizer/TermAnalyzerTest.java
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.tools.util.normalizer;
+
+import java.util.List;
+import java.util.Locale;
+import java.util.Set;
+
+import org.junit.jupiter.api.Test;
+
+import opennlp.tools.lemmatizer.Lemmatizer;
+import opennlp.tools.stemmer.PorterStemmer;
+import opennlp.tools.util.Span;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertSame;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+public class TermAnalyzerTest {
+
+  private static String cp(int codePoint) {
+    return new String(Character.toChars(codePoint));
+  }
+
+  @Test
+  void testNoDimensionsLeavesTokenUnchanged() {
+    final TermAnalyzer analyzer = TermAnalyzer.builder().build();
+    final Term term = analyzer.analyze("Hello").get(0);
+    assertEquals("Hello", term.original());
+    assertEquals("Hello", term.normalized());
+    assertEquals("Hello", term.peel());
+    assertEquals(List.of(), analyzer.dimensions());
+  }
+
+  @Test
+  void testChainAppliesInCanonicalOrderRegardlessOfBuilderOrder() {
+    // accentFold added before caseFold, but the canonical order is caseFold then accentFold.
+    final TermAnalyzer analyzer = TermAnalyzer.builder().accentFold().caseFold().build();
+    assertEquals(List.of(Dimension.CASE_FOLD, Dimension.ACCENT_FOLD), analyzer.dimensions());
+    final String input = "CAF" + cp(0x00C9); // CAFE with capital acute E
+    final Term term = analyzer.analyze(input).get(0);
+    assertEquals(input, term.original());
+    assertEquals("cafe", term.normalized());
+    assertEquals("caf" + cp(0x00E9), term.peel()); // before accent folding: lower-case, acute kept
+  }
+
+  @Test
+  void testStemIsTheTopLayer() {
+    final TermAnalyzer analyzer =
+        TermAnalyzer.builder().caseFold().stem(new PorterStemmer()).build();
+    final Term term = analyzer.analyze("Running").get(0);
+    assertEquals("running", term.peel()); // case-folded form, before stemming
+    assertEquals("run", term.normalized());
+    assertEquals("run", term.at(Dimension.STEM));
+  }
+
+  @Test
+  void testUnconfiguredCharDimensionComputedLazily() {
+    final TermAnalyzer analyzer = TermAnalyzer.builder().build();
+    final Term term = analyzer.analyze("HELLO").get(0);
+    assertEquals("HELLO", term.normalized());
+    assertEquals("hello", term.at(Dimension.CASE_FOLD)); // lazily added on top of the final form
+  }
+
+  @Test
+  void testStemDimensionWithoutStemmerFailsLoudly() {
+    final TermAnalyzer analyzer = TermAnalyzer.builder().caseFold().build();
+    final Term term = analyzer.analyze("running").get(0);
+    assertThrows(IllegalStateException.class, () -> term.at(Dimension.STEM));
+  }
+
+  @Test
+  void testLemmaWithoutLemmatizerFailsLoudly() {
+    final TermAnalyzer analyzer = TermAnalyzer.builder().build();
+    final Term term = analyzer.analyze("running").get(0);
+    assertThrows(IllegalStateException.class, () -> term.at(Dimension.LEMMA));
+  }
+
+  @Test
+  void testAnalyzeTextProducesSpans() {
+    final TermAnalyzer analyzer = TermAnalyzer.builder().caseFold().build();
+    final List<Term> terms = analyzer.analyze("The Cats");
+    assertEquals(2, terms.size());
+    assertEquals("The", terms.get(0).original());
+    assertEquals("the", terms.get(0).normalized());
+    assertEquals(new Span(0, 3), terms.get(0).span());
+    assertEquals("Cats", terms.get(1).original());
+    assertEquals(new Span(4, 8), terms.get(1).span());
+  }
+
+  @Test
+  void testAnalyzeTokensHasNoSpan() {
+    final TermAnalyzer analyzer = TermAnalyzer.builder().caseFold().build();
+    final List<Term> terms = analyzer.analyze(new String[] {"Cats"}, new String[] {"NNS"});
+    assertNull(terms.get(0).span());
+    assertEquals("cats", terms.get(0).normalized());
+  }
+
+  @Test
+  void testAnalyzeTokensRejectsLengthMismatch() {
+    final TermAnalyzer analyzer = TermAnalyzer.builder().build();
+    assertThrows(IllegalArgumentException.class,
+        () -> analyzer.analyze(new String[] {"a", "b"}, new String[] {"X"}));
+  }
+
+  @Test
+  void testTransformRejectsNonCharacterDimension() {
+    assertThrows(IllegalArgumentException.class, () -> TermAnalyzer.builder()
+        .transform(Dimension.STEM, CaseFoldCharSequenceNormalizer.getInstance()));
+  }
+
+  @Test
+  void testLemmaWithLemmatizerAndTag() {
+    final Lemmatizer lemmatizer = new Lemmatizer() {
+      @Override
+      public String[] lemmatize(String[] tokens, String[] tags) {
+        return new String[] {"be"};
+      }
+
+      @Override
+      public List<List<String>> lemmatize(List<String> tokens, List<String> tags) {
+        return List.of(List.of("be"));
+      }
+    };
+    final TermAnalyzer analyzer =
+        TermAnalyzer.builder().caseFold().lemmatize(lemmatizer).build();
+    final Term term = analyzer.analyze(new String[] {"was"}, new String[] {"VBD"}).get(0);
+    assertEquals("be", term.normalized());
+  }
+
+  @Test
+  void testConfusableFoldComposesWithCaseFold() {
+    final TermAnalyzer analyzer = TermAnalyzer.builder().caseFold().confusableFold().build();
+    final String spoof = "P" + cp(0x0430) + "yp" + cp(0x0430) + "l"; // Paypal with Cyrillic a's
+    assertEquals(Confusables.skeleton("paypal"), analyzer.analyze(spoof).get(0).normalized());
+  }
+
+  @Test
+  void testAtIsMemoized() {
+    final TermAnalyzer analyzer = TermAnalyzer.builder().build();
+    final Term term = analyzer.analyze("HELLO").get(0);
+    final String first = term.at(Dimension.CASE_FOLD);
+    assertSame(first, term.at(Dimension.CASE_FOLD));
+  }
+
+  @Test
+  void testWhitespaceTargetIsConfigurable() {
+    final CharClass lineFold = CharClass.of(CodePointSet.of('\n', '\t'), '\n');
+    final TermAnalyzer analyzer = TermAnalyzer.builder().whitespace(lineFold::collapse).build();
+    final Term term = analyzer.analyze(new String[] {"a\n\n\tb"}, new String[] {"X"}).get(0);
+    assertEquals("a\nb", term.normalized());
+  }
+
+  @Test
+  void testCaseFoldLocaleAppliesTurkishRules() {
+    final TermAnalyzer analyzer =
+        TermAnalyzer.builder().caseFold(Locale.forLanguageTag("tr")).build();
+    assertEquals(cp(0x0131), analyzer.analyze("I").get(0).normalized()); // dotless lowercase i
+  }
+
+  @Test
+  void testAccentFoldScopeFoldsLatin() {
+    final TermAnalyzer analyzer = TermAnalyzer.builder()
+        .accentFold(Set.of(Character.UnicodeScript.LATIN), false).build();
+    assertEquals("cafe", analyzer.analyze("caf" + cp(0x00E9)).get(0).normalized()); // cafe + acute
+  }
+
+  @Test
+  void testMaxTokenLengthChopsTokens() {
+    final List<Term> terms = TermAnalyzer.builder().maxTokenLength(3).build().analyze("abcdefg");
+    assertEquals(3, terms.size());
+    assertEquals("abc", terms.get(0).original());
+    assertEquals("def", terms.get(1).original());
+    assertEquals("g", terms.get(2).original());
+  }
+
+  @Test
+  void testAnalyzeEmptyTextProducesNoTerms() {
+    assertEquals(List.of(), TermAnalyzer.builder().caseFold().build().analyze(""));
+  }
+
+  @Test
+  void testWhitespaceOnlyInputHasNoWordTerms() {
+    assertEquals(List.of(), TermAnalyzer.builder().build().analyze("   \t  "));
+  }
+
+  @Test
+  void testAtDimensionBelowFinalIsAppliedOnTop() {
+    // Final dimension is STEM; asking for NFC applies it on top of the stem (documented behavior).
+    final TermAnalyzer analyzer =
+        TermAnalyzer.builder().caseFold().stem(new PorterStemmer()).build();
+    final Term term = analyzer.analyze("Running").get(0);
+    assertEquals("run", term.normalized());
+    assertEquals("run", term.at(Dimension.NFC));
+  }
+}