From e1ecba0aba3e12ddd36fdfbf45ad3860f9b70a86 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Sat, 16 May 2026 20:08:32 -0700 Subject: [PATCH 1/4] fix(rdf): scope glossary graph by membership and surface glossary labels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The /rdf/glossary/graph endpoint silently ignored its `glossaryId` filter and returned every term in every glossary, because the SPARQL query bound `?glossary` via `OPTIONAL { ?term1 om:belongsTo ?glossary }` while the JSON-LD context (governance.jsonld) maps GlossaryTerm.glossary to `om:belongsToGlossary`. The downstream `FILTER(?glossary = <…>)` was a no-op, and the UI hierarchy view rendered every glossary's terms grouped by their raw UUID — most visibly for cross-glossary scenarios. Backend changes (openmetadata-service): - RdfRepository: use the correct `om:belongsToGlossary` predicate and make membership required (not OPTIONAL + FILTER) when scoped, so the join drops non-matching terms instead of relying on filter semantics. - Emit `group` (glossary name via COALESCE on skos:prefLabel / rdfs:label) and `glossaryId` on every term node; fall back to a DB lookup so a scoped response always carries the glossary label. - Read term names from `rdfs:label` (the actual predicate base.jsonld uses for `name`) instead of `om:name`, which is never written. - Backfill membership when a node is upgraded from a term2 (edge target) on one row to a term1 (primary) on a later row. - removeGlossaryTermRelation deletes both directions so the reverse triple written by EntityRepository.addRelationship doesn't linger. - addRelationship (generic) uses additive INSERT DATA instead of storeEntity, which destructively swept rdf:type, rdfs:label and om:belongsToGlossary off the source entity when called with a relationship-only model. - Database fallback in getGlossaryTermGraphFromDatabase now scopes in-memory; ListFilter has no first-class `glossary` predicate so addQueryParam was a silent no-op that leaked every term. - RdfUpdater short-circuits glossaryTerm⇔glossaryTerm RELATED_TO; the typed addGlossaryTermRelation / removeGlossaryTermRelation path owns those edges with the precise predicate. - RdfPropertyMapper skips blank `xsd:string` literals so empty displayNames no longer materialize as `skos:prefLabel ""` and win over `rdfs:label` at read time. UI changes (openmetadata-ui): - convertRdfGraphToOntologyGraph prefers the explicit `node.glossaryId` from the response over the FQN-prefix heuristic. - hierarchyGraphBuilder falls back to a term node's `group` when the glossary id can't be resolved against the caller's glossary list, so the combo container label is never a raw UUID. - rdfAPI.interface declares the new `glossaryId` field. Tests (openmetadata-integration-tests): - RdfGlossaryGraphIT covers nine scenarios end-to-end, including membership/labels through add+delete relation cycles, displayName set/clear lifecycle, two relation types between the same term pair, relation-type change (delete + add new), cross-glossary relations, and the DB-fallback path for an empty glossary. --- .../it/tests/RdfGlossaryGraphIT.java | 780 ++++++++++++++++++ .../service/rdf/RdfRepository.java | 250 ++++-- .../openmetadata/service/rdf/RdfUpdater.java | 22 + .../rdf/translator/RdfPropertyMapper.java | 12 +- .../OntologyExplorer/utils/graphBuilders.ts | 8 +- .../utils/hierarchyGraphBuilder.ts | 10 +- .../resources/ui/src/rest/rdfAPI.interface.ts | 6 + 7 files changed, 1019 insertions(+), 69 deletions(-) create mode 100644 openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/RdfGlossaryGraphIT.java diff --git a/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/RdfGlossaryGraphIT.java b/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/RdfGlossaryGraphIT.java new file mode 100644 index 000000000000..4aa236920b3e --- /dev/null +++ b/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/RdfGlossaryGraphIT.java @@ -0,0 +1,780 @@ +/* + * Copyright 2025 Collate + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.openmetadata.it.tests; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.time.Duration; +import java.util.HashSet; +import java.util.Set; +import java.util.UUID; +import org.awaitility.Awaitility; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; +import org.junit.jupiter.api.parallel.Isolated; +import org.openmetadata.it.bootstrap.TestSuiteBootstrap; +import org.openmetadata.it.factories.GlossaryTermTestFactory; +import org.openmetadata.it.factories.GlossaryTestFactory; +import org.openmetadata.it.util.SdkClients; +import org.openmetadata.it.util.TestNamespace; +import org.openmetadata.it.util.TestNamespaceExtension; +import org.openmetadata.schema.api.configuration.rdf.RdfConfiguration; +import org.openmetadata.schema.entity.data.Glossary; +import org.openmetadata.schema.entity.data.GlossaryTerm; +import org.openmetadata.service.rdf.RdfUpdater; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.wait.strategy.Wait; +import org.testcontainers.utility.DockerImageName; + +/** + * Integration tests for {@code GET /v1/rdf/glossary/graph} (the SPARQL-backed + * glossary term graph endpoint). + * + *

Regression for the Novartis issue where the {@code glossaryId} filter was + * silently ignored — the SPARQL query bound {@code ?glossary} via + * {@code OPTIONAL { ?term1 om:belongsTo ?glossary }}, but the predicate used + * when terms are written into RDF is {@code om:belongsToGlossary}. Result: + * {@code ?glossary} was always unbound, the downstream + * {@code FILTER(?glossary = <…>)} did not filter, and every term from every + * glossary came back. The UI then rendered group containers for every glossary + * instead of just the requested one. + * + *

See {@link GlossaryOntologyExportIT} for the parallelization and Fuseki + * container rationale — same pattern applies here. + */ +@Isolated +@Execution(ExecutionMode.SAME_THREAD) +@ExtendWith(TestNamespaceExtension.class) +public class RdfGlossaryGraphIT { + + private static final Logger LOG = LoggerFactory.getLogger(RdfGlossaryGraphIT.class); + private static final ObjectMapper MAPPER = new ObjectMapper(); + private static final HttpClient HTTP_CLIENT = + HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build(); + + private static final String FUSEKI_IMAGE = "stain/jena-fuseki:latest"; + private static final int FUSEKI_PORT = 3030; + private static final String FUSEKI_DATASET = "openmetadata"; + private static final String FUSEKI_ADMIN_PASSWORD = "test-admin"; + + private static GenericContainer localFusekiContainer; + + @BeforeAll + static void enableRdf() { + String fusekiEndpoint; + if (TestSuiteBootstrap.isFusekiEnabled()) { + fusekiEndpoint = TestSuiteBootstrap.getFusekiEndpoint(); + } else { + localFusekiContainer = + new GenericContainer<>(DockerImageName.parse(FUSEKI_IMAGE)) + .withExposedPorts(FUSEKI_PORT) + .withEnv("ADMIN_PASSWORD", FUSEKI_ADMIN_PASSWORD) + .withEnv("FUSEKI_DATASET_1", FUSEKI_DATASET) + .withTmpFs(java.util.Map.of("/fuseki/databases", "rw,size=256m,uid=100,gid=101")) + .waitingFor( + Wait.forHttp("/$/ping") + .forPort(FUSEKI_PORT) + .forStatusCode(200) + .withStartupTimeout(Duration.ofMinutes(2))); + localFusekiContainer.start(); + fusekiEndpoint = + String.format( + "http://%s:%d/%s", + localFusekiContainer.getHost(), + localFusekiContainer.getMappedPort(FUSEKI_PORT), + FUSEKI_DATASET); + LOG.info("Started local Fuseki container: {}", fusekiEndpoint); + } + + RdfConfiguration rdfConfig = new RdfConfiguration(); + rdfConfig.setEnabled(true); + rdfConfig.setBaseUri(URI.create("https://open-metadata.org/")); + rdfConfig.setStorageType(RdfConfiguration.StorageType.FUSEKI); + rdfConfig.setRemoteEndpoint(URI.create(fusekiEndpoint)); + rdfConfig.setUsername("admin"); + rdfConfig.setPassword(FUSEKI_ADMIN_PASSWORD); + rdfConfig.setDataset(FUSEKI_DATASET); + RdfUpdater.initialize(rdfConfig); + } + + @AfterAll + static void disableRdf() { + RdfUpdater.disable(); + if (localFusekiContainer != null) { + localFusekiContainer.stop(); + localFusekiContainer = null; + } + } + + @Test + void glossaryIdFilterScopesGraphToRequestedGlossary(TestNamespace ns) throws Exception { + Glossary glossaryA = GlossaryTestFactory.createWithName(ns, "graphScopeA"); + Glossary glossaryB = GlossaryTestFactory.createWithName(ns, "graphScopeB"); + + GlossaryTerm termA1 = GlossaryTermTestFactory.createWithName(ns, glossaryA, "a1"); + GlossaryTerm termA2 = GlossaryTermTestFactory.createWithName(ns, glossaryA, "a2"); + GlossaryTerm termB1 = GlossaryTermTestFactory.createWithName(ns, glossaryB, "b1"); + GlossaryTerm termB2 = GlossaryTermTestFactory.createWithName(ns, glossaryB, "b2"); + + // Wait for RDF projection of all four terms before asserting against SPARQL. + Awaitility.await() + .atMost(Duration.ofSeconds(30)) + .pollInterval(Duration.ofMillis(500)) + .untilAsserted( + () -> { + Set ids = nodeIds(fetchGlossaryGraph(null)); + assertTrue(ids.contains(termA1.getId()), "RDF should contain termA1"); + assertTrue(ids.contains(termA2.getId()), "RDF should contain termA2"); + assertTrue(ids.contains(termB1.getId()), "RDF should contain termB1"); + assertTrue(ids.contains(termB2.getId()), "RDF should contain termB2"); + }); + + JsonNode scoped = fetchGlossaryGraph(glossaryA.getId()); + Set scopedIds = nodeIds(scoped); + + assertTrue( + scopedIds.contains(termA1.getId()), + "Scoped graph should contain termA1 from the requested glossary"); + assertTrue( + scopedIds.contains(termA2.getId()), + "Scoped graph should contain termA2 from the requested glossary"); + assertFalse( + scopedIds.contains(termB1.getId()), + "Scoped graph must NOT contain termB1 from a different glossary"); + assertFalse( + scopedIds.contains(termB2.getId()), + "Scoped graph must NOT contain termB2 from a different glossary"); + } + + @Test + void scopedResponseCarriesGlossaryNameAndIdPerNode(TestNamespace ns) throws Exception { + // Regression for the second symptom of the same bug: the UI's hierarchy + // view fell back to rendering raw UUIDs as the group container label + // because the RDF response did not carry the parent glossary's name/id + // per term. The fix surfaces both `group` (glossary name) and `glossaryId` + // on every term node so the UI can resolve the label without depending on + // the caller's glossary listing. + Glossary glossary = GlossaryTestFactory.createWithName(ns, "labeled"); + GlossaryTerm term = GlossaryTermTestFactory.createWithName(ns, glossary, "t1"); + + Awaitility.await() + .atMost(Duration.ofSeconds(30)) + .pollInterval(Duration.ofMillis(500)) + .untilAsserted( + () -> + assertTrue( + nodeIds(fetchGlossaryGraph(glossary.getId())).contains(term.getId()), + "Term should be projected to RDF before assertion")); + + JsonNode scoped = fetchGlossaryGraph(glossary.getId()); + JsonNode termNode = null; + for (JsonNode node : scoped.get("nodes")) { + JsonNode idNode = node.get("id"); + if (idNode != null && term.getId().toString().equals(idNode.asText())) { + termNode = node; + break; + } + } + assertNotNull(termNode, "Scoped response should include the created term"); + + JsonNode groupNode = termNode.get("group"); + assertNotNull( + groupNode, "Term node should carry a `group` field with the parent glossary's name"); + assertEquals( + glossary.getName(), + groupNode.asText(), + "Group label should match the parent glossary's name"); + + JsonNode glossaryIdNode = termNode.get("glossaryId"); + assertNotNull(glossaryIdNode, "Term node should carry the parent glossary's id"); + assertEquals(glossary.getId().toString(), glossaryIdNode.asText()); + } + + @Test + void termNodeLabelFallsBackToNameWhenDisplayNameIsAbsent(TestNamespace ns) throws Exception { + // Regression: the SPARQL query bound the term label via om:name, but the + // JSON-LD context (base.jsonld) maps the `name` field to rdfs:label, so + // om:name is never written. Terms without a displayName (skos:prefLabel) + // therefore came back with a null label, and the UI rendered the entity + // UUID instead of the human name. Fix reads rdfs:label so every term has + // a real label whether or not a displayName was supplied. + Glossary glossary = GlossaryTestFactory.createWithName(ns, "labels"); + GlossaryTerm term = GlossaryTermTestFactory.createWithName(ns, glossary, "noDisplayName"); + + Awaitility.await() + .atMost(Duration.ofSeconds(30)) + .pollInterval(Duration.ofMillis(500)) + .untilAsserted( + () -> + assertTrue( + nodeIds(fetchGlossaryGraph(glossary.getId())).contains(term.getId()), + "Term should be projected to RDF before assertion")); + + JsonNode scoped = fetchGlossaryGraph(glossary.getId()); + JsonNode termNode = null; + for (JsonNode node : scoped.get("nodes")) { + JsonNode idNode = node.get("id"); + if (idNode != null && term.getId().toString().equals(idNode.asText())) { + termNode = node; + break; + } + } + assertNotNull(termNode, "Scoped response should include the created term"); + + JsonNode labelNode = termNode.get("label"); + assertNotNull(labelNode, "Term node should carry a label"); + String label = labelNode.asText(); + assertFalse(label.isBlank(), "Label must not be blank — empty prefLabel should not win"); + assertFalse( + label.equals(term.getId().toString()), + "Label must not fall through to the entity UUID when the term name is available"); + assertEquals( + term.getName(), + label, + "Label should be the term's name (rdfs:label) when no displayName is set"); + } + + @Test + void glossaryIdFilterReturnsEmptyForGlossaryWithNoTerms(TestNamespace ns) throws Exception { + // A second glossary with terms exists so the SPARQL store is non-empty + // overall; without the predicate fix the result for the empty glossary + // would still leak the populated glossary's terms. + Glossary populatedGlossary = GlossaryTestFactory.createWithName(ns, "populated"); + GlossaryTerm populatedTerm = + GlossaryTermTestFactory.createWithName(ns, populatedGlossary, "p1"); + + Glossary emptyGlossary = GlossaryTestFactory.createWithName(ns, "empty"); + + Awaitility.await() + .atMost(Duration.ofSeconds(30)) + .pollInterval(Duration.ofMillis(500)) + .untilAsserted( + () -> + assertTrue( + nodeIds(fetchGlossaryGraph(null)).contains(populatedTerm.getId()), + "Populated glossary's term should be projected to RDF")); + + JsonNode scoped = fetchGlossaryGraph(emptyGlossary.getId()); + Set scopedIds = nodeIds(scoped); + + assertFalse( + scopedIds.contains(populatedTerm.getId()), + "Scoped graph for the empty glossary must NOT leak terms from another glossary"); + } + + @Test + void labelTracksDisplayNameLifecycle(TestNamespace ns) throws Exception { + // Round-trip the displayName field through the three states that the UI + // can produce: never set → set to a non-empty value → cleared back to + // empty. The effective label should follow in BOTH the RDF-backed graph + // response and the DB-backed term-detail API: + // - never set: the term name (rdfs:label in RDF; .name in DB) + // - set: the display name (skos:prefLabel / .displayName) + // - cleared to "": the term name again, NOT the empty string + Glossary glossary = GlossaryTestFactory.createWithName(ns, "labelLifecycle"); + GlossaryTerm term = GlossaryTermTestFactory.createWithName(ns, glossary, "term"); + + awaitTermInGraph(glossary.getId(), term.getId()); + + // 1. No displayName set → label is the term name. + assertEffectiveLabel(glossary.getId(), term.getId(), term.getName()); + + // 2. Set a display name → label switches to it. + patchTerm( + term.getId(), "[{\"op\":\"add\",\"path\":\"/displayName\",\"value\":\"Pretty Name\"}]"); + awaitEffectiveLabel(glossary.getId(), term.getId(), "Pretty Name"); + + // 3. Update the display name → label tracks the new value. + patchTerm( + term.getId(), "[{\"op\":\"replace\",\"path\":\"/displayName\",\"value\":\"Renamed\"}]"); + awaitEffectiveLabel(glossary.getId(), term.getId(), "Renamed"); + + // 4. Clear the display name (set to empty) → label MUST fall back to the + // term name, not surface as a blank string. This is the symptom we hit in + // the local stack: COGS had skos:prefLabel="" winning over its rdfs:label + // and the UI rendered an empty box. + patchTerm(term.getId(), "[{\"op\":\"replace\",\"path\":\"/displayName\",\"value\":\"\"}]"); + awaitEffectiveLabel(glossary.getId(), term.getId(), term.getName()); + } + + @Test + void glossaryMembershipSurvivesAddAndDeleteRelations(TestNamespace ns) throws Exception { + // Regression for the term-mutation projection path: each call to add or + // delete a relation re-projects the term to RDF, and a regression in that + // path can drop the om:belongsToGlossary / rdfs:label triples — at which + // point the scoped graph query (which anchors on those) silently drops + // the term. Membership is asserted via both the RDF graph endpoint and + // the DB-backed term API so the invariant holds in either read mode. + Glossary glossary = GlossaryTestFactory.createWithName(ns, "membership"); + GlossaryTerm a = GlossaryTermTestFactory.createWithName(ns, glossary, "alpha"); + GlossaryTerm b = GlossaryTermTestFactory.createWithName(ns, glossary, "beta"); + + awaitTermInGraph(glossary.getId(), a.getId()); + awaitTermInGraph(glossary.getId(), b.getId()); + + addRelation(a.getId(), b.getId(), "relatedTo"); + awaitMembershipAndLabel(glossary, a, a.getName()); + awaitMembershipAndLabel(glossary, b, b.getName()); + awaitRelatedTermInDb(a.getId(), b.getId(), "relatedTo"); + + deleteRelation(a.getId(), b.getId(), "relatedTo"); + awaitMembershipAndLabel(glossary, a, a.getName()); + awaitMembershipAndLabel(glossary, b, b.getName()); + awaitRelatedTermAbsentInDb(a.getId(), b.getId()); + } + + @Test + void sameTermPairCanHoldMultipleRelationTypesIndependently(TestNamespace ns) throws Exception { + // Per PR #28172 the (fromId, toId, relation, relationType) PK lets the + // same term pair carry multiple typed relations. Verify that adding a + // second relation type does NOT remove the first, and that removing one + // type leaves the other intact — both in the DB term record and in the + // RDF graph response. + Glossary glossary = GlossaryTestFactory.createWithName(ns, "multiRel"); + GlossaryTerm a = GlossaryTermTestFactory.createWithName(ns, glossary, "alpha"); + GlossaryTerm b = GlossaryTermTestFactory.createWithName(ns, glossary, "beta"); + + awaitTermInGraph(glossary.getId(), a.getId()); + awaitTermInGraph(glossary.getId(), b.getId()); + + addRelation(a.getId(), b.getId(), "relatedTo"); + awaitRelatedTermInDb(a.getId(), b.getId(), "relatedTo"); + + addRelation(a.getId(), b.getId(), "synonym"); + // Both types must coexist on the DB term. + awaitRelatedTermInDb(a.getId(), b.getId(), "relatedTo"); + awaitRelatedTermInDb(a.getId(), b.getId(), "synonym"); + // And the RDF graph must surface both edges. + awaitEdgeBetween(glossary.getId(), a.getId(), b.getId(), "relatedTo"); + awaitEdgeBetween(glossary.getId(), a.getId(), b.getId(), "synonym"); + + // Removing one type must leave the other in place. + deleteRelation(a.getId(), b.getId(), "relatedTo"); + awaitRelatedTermOfTypeAbsentInDb(a.getId(), b.getId(), "relatedTo"); + awaitRelatedTermInDb(a.getId(), b.getId(), "synonym"); + awaitEdgeBetween(glossary.getId(), a.getId(), b.getId(), "synonym"); + + // Membership and label survive all of it. + awaitMembershipAndLabel(glossary, a, a.getName()); + awaitMembershipAndLabel(glossary, b, b.getName()); + } + + @Test + void changingRelationTypeReplacesOldEdgeWithNewType(TestNamespace ns) throws Exception { + // Simulates the UI "edit relation type" flow as delete-then-add. Verify + // the resulting state is exactly one edge of the new type, no orphans. + Glossary glossary = GlossaryTestFactory.createWithName(ns, "swapRel"); + GlossaryTerm a = GlossaryTermTestFactory.createWithName(ns, glossary, "from"); + GlossaryTerm b = GlossaryTermTestFactory.createWithName(ns, glossary, "to"); + + awaitTermInGraph(glossary.getId(), a.getId()); + awaitTermInGraph(glossary.getId(), b.getId()); + + addRelation(a.getId(), b.getId(), "relatedTo"); + awaitEdgeBetween(glossary.getId(), a.getId(), b.getId(), "relatedTo"); + + deleteRelation(a.getId(), b.getId(), "relatedTo"); + addRelation(a.getId(), b.getId(), "broader"); + + awaitRelatedTermInDb(a.getId(), b.getId(), "broader"); + awaitEdgeBetween(glossary.getId(), a.getId(), b.getId(), "broader"); + + // The previous relation type must NOT linger in either layer. + Awaitility.await() + .atMost(Duration.ofSeconds(15)) + .pollInterval(Duration.ofMillis(500)) + .untilAsserted( + () -> { + JsonNode term = fetchTerm(a.getId()); + for (JsonNode r : term.path("relatedTerms")) { + if (b.getId().toString().equals(r.path("term").path("id").asText(null))) { + assertFalse( + "relatedTo".equals(r.path("relationType").asText(null)), + "Old relationType must be gone after delete+add"); + } + } + assertFalse( + hasEdge(fetchGlossaryGraph(glossary.getId()), a.getId(), b.getId(), "relatedTo"), + "Old edge type must not linger in the RDF graph"); + }); + + // Membership/label still intact. + awaitMembershipAndLabel(glossary, a, a.getName()); + awaitMembershipAndLabel(glossary, b, b.getName()); + } + + @Test + void crossGlossaryRelationDoesNotLeakIntoOtherGlossaryScope(TestNamespace ns) throws Exception { + // A cross-glossary relation creates an outbound edge but should not make + // the source term a "member" of the target glossary. Scoping by the + // target glossary's id must still exclude the foreign source term as a + // primary node. + Glossary glossaryA = GlossaryTestFactory.createWithName(ns, "crossA"); + Glossary glossaryB = GlossaryTestFactory.createWithName(ns, "crossB"); + GlossaryTerm a1 = GlossaryTermTestFactory.createWithName(ns, glossaryA, "a1"); + GlossaryTerm b1 = GlossaryTermTestFactory.createWithName(ns, glossaryB, "b1"); + + awaitTermInGraph(glossaryA.getId(), a1.getId()); + awaitTermInGraph(glossaryB.getId(), b1.getId()); + + addRelation(a1.getId(), b1.getId(), "relatedTo"); + + // a1 must still be a first-class member of glossaryA after the cross-glossary relation. + awaitMembershipAndLabel(glossaryA, a1, a1.getName()); + + // a1 may surface inside glossaryB's scoped graph as a term2 (edge target), + // but it must NEVER appear as a primary node attributed to glossaryB — + // i.e. it must not carry glossaryB's id / name. + JsonNode scopedB = fetchGlossaryGraph(glossaryB.getId()); + for (JsonNode node : scopedB.get("nodes")) { + if (a1.getId().toString().equals(node.path("id").asText(null))) { + String group = node.path("group").asText(null); + String gid = node.path("glossaryId").asText(null); + assertFalse( + glossaryB.getName().equals(group), + "Foreign term must not be attributed to the scoped glossary's name"); + assertFalse( + glossaryB.getId().toString().equals(gid), + "Foreign term must not carry the scoped glossary's id"); + } + } + } + + private void awaitTermInGraph(UUID glossaryId, UUID termId) { + Awaitility.await() + .atMost(Duration.ofSeconds(30)) + .pollInterval(Duration.ofMillis(500)) + .untilAsserted( + () -> + assertTrue( + nodeIds(fetchGlossaryGraph(glossaryId)).contains(termId), + () -> "Term " + termId + " should be projected to RDF")); + } + + /** + * Resolve the term's effective UI label the same way the UI does: prefer a + * non-blank displayName, otherwise fall back to the term name. Asserted on + * the DB-backed term API (works whether RDF is enabled or not). + */ + private static String effectiveLabel(JsonNode term) { + String displayName = term.path("displayName").asText(null); + if (displayName != null && !displayName.isBlank()) { + return displayName; + } + return term.path("name").asText(null); + } + + /** Assert the term's effective label via BOTH the RDF graph and the DB term API. */ + private void assertEffectiveLabel(UUID glossaryId, UUID termId, String expected) + throws Exception { + JsonNode dbTerm = fetchTerm(termId); + assertEquals(expected, effectiveLabel(dbTerm), "DB term effective label should match expected"); + JsonNode graphNode = findNode(fetchGlossaryGraph(glossaryId), termId); + assertEquals( + expected, + graphNode.path("label").asText(null), + "RDF graph node label should match expected"); + } + + /** Same as {@link #assertEffectiveLabel} but polls until consistent (post-mutation). */ + private void awaitEffectiveLabel(UUID glossaryId, UUID termId, String expected) { + Awaitility.await() + .atMost(Duration.ofSeconds(30)) + .pollInterval(Duration.ofMillis(500)) + .untilAsserted(() -> assertEffectiveLabel(glossaryId, termId, expected)); + } + + /** + * After a mutation, assert the term still appears scoped to the correct + * glossary (with proper group/glossaryId/label) in the RDF graph AND that + * the DB still reports the term as a member of the same glossary. + */ + private void awaitMembershipAndLabel(Glossary glossary, GlossaryTerm term, String expectedLabel) { + Awaitility.await() + .atMost(Duration.ofSeconds(30)) + .pollInterval(Duration.ofMillis(500)) + .untilAsserted( + () -> { + JsonNode dbTerm = fetchTerm(term.getId()); + JsonNode dbGlossary = dbTerm.path("glossary"); + assertEquals( + glossary.getId().toString(), + dbGlossary.path("id").asText(null), + "DB term must report correct parent glossary id"); + assertEquals( + glossary.getName(), + dbGlossary.path("name").asText(null), + "DB term must report correct parent glossary name"); + assertEquals(expectedLabel, effectiveLabel(dbTerm)); + + JsonNode graphNode = findNode(fetchGlossaryGraph(glossary.getId()), term.getId()); + assertNotNull( + graphNode.get("id"), () -> "Term " + term.getId() + " missing from scoped graph"); + assertEquals(glossary.getName(), graphNode.path("group").asText(null)); + assertEquals(glossary.getId().toString(), graphNode.path("glossaryId").asText(null)); + assertEquals(expectedLabel, graphNode.path("label").asText(null)); + }); + } + + private void awaitRelatedTermInDb(UUID fromTermId, UUID toTermId, String expectedRelationType) { + Awaitility.await() + .atMost(Duration.ofSeconds(15)) + .pollInterval(Duration.ofMillis(500)) + .untilAsserted( + () -> { + JsonNode term = fetchTerm(fromTermId); + JsonNode related = term.path("relatedTerms"); + boolean found = false; + for (JsonNode r : related) { + if (toTermId.toString().equals(r.path("term").path("id").asText(null)) + && expectedRelationType.equals(r.path("relationType").asText(null))) { + found = true; + break; + } + } + assertTrue( + found, + () -> + "Expected DB term " + + fromTermId + + " to have relatedTerm " + + toTermId + + " of type " + + expectedRelationType); + }); + } + + private boolean hasEdge(JsonNode graph, UUID fromId, UUID toId, String relationType) { + JsonNode edges = graph.path("edges"); + String from = fromId.toString(); + String to = toId.toString(); + for (JsonNode e : edges) { + String f = e.path("from").asText(null); + String t = e.path("to").asText(null); + String type = e.path("relationType").asText(null); + boolean idsMatch = (from.equals(f) && to.equals(t)) || (from.equals(t) && to.equals(f)); + if (idsMatch && relationType.equalsIgnoreCase(type)) { + return true; + } + } + return false; + } + + private void awaitEdgeBetween(UUID glossaryId, UUID fromId, UUID toId, String relationType) { + Awaitility.await() + .atMost(Duration.ofSeconds(30)) + .pollInterval(Duration.ofMillis(500)) + .untilAsserted( + () -> + assertTrue( + hasEdge(fetchGlossaryGraph(glossaryId), fromId, toId, relationType), + () -> + "Expected edge " + + fromId + + " -[" + + relationType + + "]-> " + + toId + + " in RDF graph")); + } + + private void awaitRelatedTermOfTypeAbsentInDb( + UUID fromTermId, UUID toTermId, String relationType) { + Awaitility.await() + .atMost(Duration.ofSeconds(15)) + .pollInterval(Duration.ofMillis(500)) + .untilAsserted( + () -> { + JsonNode term = fetchTerm(fromTermId); + for (JsonNode r : term.path("relatedTerms")) { + if (toTermId.toString().equals(r.path("term").path("id").asText(null)) + && relationType.equals(r.path("relationType").asText(null))) { + fail( + "DB term " + + fromTermId + + " should no longer reference " + + toTermId + + " of type " + + relationType); + } + } + }); + } + + private void awaitRelatedTermAbsentInDb(UUID fromTermId, UUID toTermId) { + Awaitility.await() + .atMost(Duration.ofSeconds(15)) + .pollInterval(Duration.ofMillis(500)) + .untilAsserted( + () -> { + JsonNode term = fetchTerm(fromTermId); + for (JsonNode r : term.path("relatedTerms")) { + assertFalse( + toTermId.toString().equals(r.path("term").path("id").asText(null)), + () -> + "DB term " + + fromTermId + + " should no longer reference deleted relation to " + + toTermId); + } + }); + } + + private JsonNode fetchTerm(UUID termId) throws Exception { + HttpRequest request = + HttpRequest.newBuilder() + .uri( + URI.create( + SdkClients.getServerUrl() + + "/v1/glossaryTerms/" + + termId + + "?fields=relatedTerms,glossary")) + .header("Authorization", "Bearer " + SdkClients.getAdminToken()) + .header("Accept", "application/json") + .timeout(Duration.ofSeconds(30)) + .GET() + .build(); + HttpResponse response = HTTP_CLIENT.send(request, HttpResponse.BodyHandlers.ofString()); + assertEquals( + 200, + response.statusCode(), + () -> "Fetch term failed: " + response.statusCode() + " " + response.body()); + return MAPPER.readTree(response.body()); + } + + private JsonNode findNode(JsonNode graph, UUID termId) { + for (JsonNode node : graph.get("nodes")) { + if (termId.toString().equals(node.path("id").asText(null))) { + return node; + } + } + return MAPPER.createObjectNode(); + } + + private void patchTerm(UUID termId, String jsonPatch) throws Exception { + HttpRequest request = + HttpRequest.newBuilder() + .uri(URI.create(SdkClients.getServerUrl() + "/v1/glossaryTerms/" + termId)) + .header("Authorization", "Bearer " + SdkClients.getAdminToken()) + .header("Content-Type", "application/json-patch+json") + .timeout(Duration.ofSeconds(30)) + .method("PATCH", HttpRequest.BodyPublishers.ofString(jsonPatch)) + .build(); + HttpResponse response = HTTP_CLIENT.send(request, HttpResponse.BodyHandlers.ofString()); + assertEquals( + 200, + response.statusCode(), + () -> "PATCH term failed: " + response.statusCode() + " " + response.body()); + } + + private void addRelation(UUID fromId, UUID toId, String relationType) throws Exception { + String body = + String.format( + "{\"term\":{\"id\":\"%s\",\"type\":\"glossaryTerm\"},\"relationType\":\"%s\"}", + toId, relationType); + HttpRequest request = + HttpRequest.newBuilder() + .uri( + URI.create( + SdkClients.getServerUrl() + "/v1/glossaryTerms/" + fromId + "/relations")) + .header("Authorization", "Bearer " + SdkClients.getAdminToken()) + .header("Content-Type", "application/json") + .timeout(Duration.ofSeconds(30)) + .POST(HttpRequest.BodyPublishers.ofString(body)) + .build(); + HttpResponse response = HTTP_CLIENT.send(request, HttpResponse.BodyHandlers.ofString()); + assertEquals( + 200, + response.statusCode(), + () -> "Add relation failed: " + response.statusCode() + " " + response.body()); + } + + private void deleteRelation(UUID fromId, UUID toId, String relationType) throws Exception { + HttpRequest request = + HttpRequest.newBuilder() + .uri( + URI.create( + SdkClients.getServerUrl() + + "/v1/glossaryTerms/" + + fromId + + "/relations/" + + toId + + "?relationType=" + + relationType)) + .header("Authorization", "Bearer " + SdkClients.getAdminToken()) + .timeout(Duration.ofSeconds(30)) + .DELETE() + .build(); + HttpResponse response = HTTP_CLIENT.send(request, HttpResponse.BodyHandlers.ofString()); + assertEquals( + 200, + response.statusCode(), + () -> "Delete relation failed: " + response.statusCode() + " " + response.body()); + } + + private JsonNode fetchGlossaryGraph(UUID glossaryId) throws Exception { + String baseUrl = SdkClients.getServerUrl(); + String token = SdkClients.getAdminToken(); + String url = String.format("%s/v1/rdf/glossary/graph?limit=500", baseUrl); + if (glossaryId != null) { + url = url + "&glossaryId=" + glossaryId; + } + HttpRequest request = + HttpRequest.newBuilder() + .uri(URI.create(url)) + .header("Authorization", "Bearer " + token) + .header("Accept", "application/json") + .timeout(Duration.ofSeconds(60)) + .GET() + .build(); + HttpResponse response = HTTP_CLIENT.send(request, HttpResponse.BodyHandlers.ofString()); + assertEquals( + 200, + response.statusCode(), + () -> "Expected 200 OK from /v1/rdf/glossary/graph; body=" + response.body()); + JsonNode body = MAPPER.readTree(response.body()); + assertNotNull(body.get("nodes"), "Response should include a nodes array"); + return body; + } + + private Set nodeIds(JsonNode graph) { + Set ids = new HashSet<>(); + for (JsonNode node : graph.get("nodes")) { + JsonNode idNode = node.get("id"); + if (idNode != null && !idNode.isNull()) { + try { + ids.add(UUID.fromString(idNode.asText())); + } catch (IllegalArgumentException ignored) { + // Non-UUID ids (e.g. glossary URIs) are not term identifiers — skip. + } + } + } + return ids; + } +} diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/rdf/RdfRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/rdf/RdfRepository.java index 2582558f9020..77c48d22c59e 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/rdf/RdfRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/rdf/RdfRepository.java @@ -239,46 +239,26 @@ public void addRelationship(EntityRelationship relationship) { return; } + // Append the relationship triples directly with INSERT DATA. The previous + // implementation fetched the entity model, merged the new triple in, then + // round-tripped through storeEntity — but storeEntity performs a + // translator-scoped delete (rdf:type, rdfs:label, om:belongsToGlossary, + // and every literal) on the entity URI before loading the supplied model. + // Called with a relationship-only model that path wiped the source + // entity's identity, so subsequent SPARQL queries anchored on rdf:type or + // om:belongsToGlossary stopped finding the term. INSERT DATA is purely + // additive and matches the pattern used by addGlossaryTermRelation, which + // never had this bug. try { Model relationshipModel = createRelationshipModel(relationship); - - String fromUri = - config.getBaseUri().toString() - + "entity/" - + relationship.getFromEntity() - + "/" - + relationship.getFromId(); - String toUri = - config.getBaseUri().toString() - + "entity/" - + relationship.getToEntity() - + "/" - + relationship.getToId(); - - // Add to the entity's graph - Model fromEntityModel = - storageService.getEntity(relationship.getFromEntity(), relationship.getFromId()); - - if (fromEntityModel == null) { - // During initialization, relationships might be added before entities are created in RDF - // This is expected behavior, so we'll handle it gracefully without warnings - LOG.debug( - "Entity {} with ID {} not yet in RDF store, creating model for relationship", - relationship.getFromEntity(), - relationship.getFromId()); - fromEntityModel = ModelFactory.createDefaultModel(); - - // Add basic entity information to make the model valid - Resource entityResource = fromEntityModel.createResource(fromUri); - entityResource.addProperty( - fromEntityModel.createProperty(config.getBaseUri() + "ontology/entityType"), - relationship.getFromEntity()); + java.io.StringWriter writer = new java.io.StringWriter(); + relationshipModel.write(writer, "N-TRIPLES"); + String triples = writer.toString(); + if (triples.isBlank()) { + return; } - - fromEntityModel.add(relationshipModel); - storageService.storeEntity( - relationship.getFromEntity(), relationship.getFromId(), fromEntityModel); - + String insertQuery = "INSERT DATA { GRAPH <" + KNOWLEDGE_GRAPH + "> { " + triples + " } }"; + storageService.executeSparqlUpdate(insertQuery); LOG.debug("Added relationship {} to RDF store", relationship); } catch (Exception e) { LOG.error("Failed to add relationship to RDF", e); @@ -1204,17 +1184,41 @@ private String buildGlossaryTermGraphQuery( queryBuilder.append("PREFIX skos: "); queryBuilder.append("PREFIX prov: "); queryBuilder.append( - "SELECT DISTINCT ?term1 ?term2 ?relationType ?term1Name ?term2Name ?term1FQN ?term2FQN ?term1DisplayName ?term2DisplayName ?glossary "); + "SELECT DISTINCT ?term1 ?term2 ?relationType ?term1Name ?term2Name ?term1FQN ?term2FQN ?term1DisplayName ?term2DisplayName ?glossary ?glossaryName "); queryBuilder.append("WHERE { "); queryBuilder.append(" GRAPH ?g { "); // Note: glossaryTerm entities are typed as skos:Concept (see RdfUtils.getRdfType) queryBuilder.append(" ?term1 a skos:Concept . "); // Filter to only include glossaryTerm URIs (not tags or other skos:Concept types) queryBuilder.append(" FILTER(CONTAINS(STR(?term1), '/glossaryTerm/')) "); - queryBuilder.append(" OPTIONAL { ?term1 om:name ?term1Name } "); + // `name` is mapped to rdfs:label in base.jsonld; om:name is never written. + // Read rdfs:label so terms without a displayName still surface a real label + // instead of falling back to the entity UUID at render time. + queryBuilder.append(" OPTIONAL { ?term1 rdfs:label ?term1Name } "); queryBuilder.append(" OPTIONAL { ?term1 skos:prefLabel ?term1DisplayName } "); queryBuilder.append(" OPTIONAL { ?term1 om:fullyQualifiedName ?term1FQN } "); - queryBuilder.append(" OPTIONAL { ?term1 om:belongsTo ?glossary } "); + // When glossaryId is supplied, require the membership triple so the row is + // dropped (not just filtered) for terms outside the requested glossary. + // The predicate is om:belongsToGlossary (see governance.jsonld @context for + // GlossaryTerm.glossary); the previous om:belongsTo predicate is never + // written, which made the downstream FILTER a no-op and leaked every + // glossary's terms. + if (glossaryId != null) { + String glossaryUri = config.getBaseUri().toString() + "entity/glossary/" + glossaryId; + queryBuilder.append(" ?term1 om:belongsToGlossary <").append(glossaryUri).append("> . "); + queryBuilder.append(" BIND(<").append(glossaryUri).append("> AS ?glossary) "); + } else { + queryBuilder.append(" OPTIONAL { ?term1 om:belongsToGlossary ?glossary } "); + } + // Resolve the glossary's human label so the UI can render a group container + // even when the parent Glossary entity is not in the caller's accessible + // glossary list (otherwise it falls back to the raw UUID). The `name` + // property is mapped to rdfs:label by base.jsonld; skos:prefLabel + // (displayName) is also tried so a user-friendly label wins when present. + queryBuilder.append(" OPTIONAL { ?glossary skos:prefLabel ?glossaryDisplayName } "); + queryBuilder.append(" OPTIONAL { ?glossary rdfs:label ?glossaryRdfsLabel } "); + queryBuilder.append( + " BIND(COALESCE(?glossaryDisplayName, ?glossaryRdfsLabel) AS ?glossaryName) "); // Build relation type filter List relationPredicates = new ArrayList<>(); @@ -1254,7 +1258,7 @@ private String buildGlossaryTermGraphQuery( // Note: glossaryTerm entities are typed as skos:Concept (see RdfUtils.getRdfType) queryBuilder.append(" ?term2 a skos:Concept . "); queryBuilder.append(" FILTER(CONTAINS(STR(?term2), '/glossaryTerm/')) "); - queryBuilder.append(" OPTIONAL { ?term2 om:name ?term2Name } "); + queryBuilder.append(" OPTIONAL { ?term2 rdfs:label ?term2Name } "); queryBuilder.append(" OPTIONAL { ?term2 skos:prefLabel ?term2DisplayName } "); queryBuilder.append(" OPTIONAL { ?term2 om:fullyQualifiedName ?term2FQN } "); queryBuilder.append(" FILTER(?relationType IN ("); @@ -1262,11 +1266,8 @@ private String buildGlossaryTermGraphQuery( queryBuilder.append(")) "); queryBuilder.append(" } "); - // Filter by glossary if specified - if (glossaryId != null) { - String glossaryUri = config.getBaseUri().toString() + "entity/glossary/" + glossaryId; - queryBuilder.append(" FILTER(?glossary = <").append(glossaryUri).append(">) "); - } + // Glossary scoping is handled above by adding a required om:belongsToGlossary + // triple to ?term1 when glossaryId is non-null. queryBuilder.append(" } "); queryBuilder.append("} "); @@ -1319,6 +1320,14 @@ private String parseGlossaryTermGraphResults( Set edgeKeys = new HashSet<>(); Set termsWithRelations = new HashSet<>(); + // When scoped to a specific glossary, resolve its display label from the + // DB once and use it as a fallback for `?glossaryName`. The SPARQL + // OPTIONAL binds nothing if the parent Glossary entity hasn't been (or + // has only partially been) projected to RDF — without this fallback the + // response would omit the `group` field and the UI hierarchy view would + // render the glossary UUID instead of its name. + String scopedGlossaryName = lookupGlossaryDisplayName(glossaryId); + com.fasterxml.jackson.databind.JsonNode resultsJson = JsonUtils.readTree(sparqlResults); if (resultsJson.has("results") && resultsJson.get("results").has("bindings")) { @@ -1358,20 +1367,56 @@ private String parseGlossaryTermGraphResults( binding.has("term2FQN") && !binding.get("term2FQN").isNull() ? binding.get("term2FQN").get("value").asText() : null; + String glossaryUri = + binding.has("glossary") && !binding.get("glossary").isNull() + ? binding.get("glossary").get("value").asText() + : null; + String glossaryName = + binding.has("glossaryName") && !binding.get("glossaryName").isNull() + ? binding.get("glossaryName").get("value").asText() + : null; - // Use displayName if available, otherwise fall back to name - String term1Label = term1DisplayName != null ? term1DisplayName : term1Name; - String term2Label = term2DisplayName != null ? term2DisplayName : term2Name; + // Treat blank as missing: skos:prefLabel is materialized as an empty + // literal when the term has no displayName, and an empty string here + // would otherwise win over the real rdfs:label name and render as a + // blank node label in the UI. + String term1Label = firstNonBlank(term1DisplayName, term1Name); + String term2Label = firstNonBlank(term2DisplayName, term2Name); + glossaryName = firstNonBlank(glossaryName, scopedGlossaryName); if (term1Uri == null) continue; // Add term1 node if (!addedNodes.contains(term1Uri) && addedNodes.size() < limit) { com.fasterxml.jackson.databind.node.ObjectNode node = - createGlossaryTermNode(term1Uri, term1Label, term1FQN, term2Uri != null); + createGlossaryTermNode( + term1Uri, term1Label, term1FQN, glossaryUri, glossaryName, term2Uri != null); nodes.add(node); nodeMap.put(term1Uri, node); addedNodes.add(term1Uri); + } else if (addedNodes.contains(term1Uri)) { + // The term may have been added earlier as a `term2` (edge target) + // by a row whose `term1` was a different term; that path doesn't + // populate glossaryId / group. Now that we have a row where this + // term is the primary, backfill the membership fields so the + // hierarchy view in the UI can resolve the group container label. + com.fasterxml.jackson.databind.node.ObjectNode existing = nodeMap.get(term1Uri); + if (existing != null) { + if (!existing.has("glossaryId") && glossaryUri != null) { + existing.put("glossaryId", extractEntityIdFromUri(glossaryUri)); + } + if (!existing.has("group") && !isBlank(glossaryName)) { + existing.put("group", glossaryName); + } + // Also upgrade the label if we now have a real one (the term2 + // path falls through to UUID when neither name nor displayName + // is present in that row). + String currentLabel = existing.path("label").asText(null); + String entityId = extractEntityIdFromUri(term1Uri); + if ((currentLabel == null || currentLabel.equals(entityId)) && !isBlank(term1Label)) { + existing.put("label", term1Label); + } + } } // If there's a relation, add term2 and the edge @@ -1380,8 +1425,11 @@ private String parseGlossaryTermGraphResults( termsWithRelations.add(term2Uri); if (!addedNodes.contains(term2Uri) && addedNodes.size() < limit) { + // term2 may live in a different glossary; the SPARQL row only + // surfaces term1's glossary, so leave the membership fields empty + // for term2 rather than mis-attributing it. com.fasterxml.jackson.databind.node.ObjectNode node = - createGlossaryTermNode(term2Uri, term2Label, term2FQN, true); + createGlossaryTermNode(term2Uri, term2Label, term2FQN, null, null, true); nodes.add(node); nodeMap.put(term2Uri, node); addedNodes.add(term2Uri); @@ -1441,7 +1489,12 @@ private String parseGlossaryTermGraphResults( } private com.fasterxml.jackson.databind.node.ObjectNode createGlossaryTermNode( - String termUri, String name, String fqn, boolean hasRelations) { + String termUri, + String name, + String fqn, + String glossaryUri, + String glossaryName, + boolean hasRelations) { com.fasterxml.jackson.databind.node.ObjectNode node = JsonUtils.getObjectMapper().createObjectNode(); @@ -1452,11 +1505,56 @@ private com.fasterxml.jackson.databind.node.ObjectNode createGlossaryTermNode( if (fqn != null) { node.put("fullyQualifiedName", fqn); } + if (glossaryUri != null) { + node.put("glossaryId", extractEntityIdFromUri(glossaryUri)); + } + if (glossaryName != null) { + // Used by the UI as the hierarchy combo (group container) label so a + // glossary name is shown even when the caller cannot see the parent + // Glossary in the glossaries listing. + node.put("group", glossaryName); + } node.put("isolated", !hasRelations); return node; } + private static boolean isBlank(String s) { + return s == null || s.isBlank(); + } + + private static String firstNonBlank(String a, String b) { + if (!isBlank(a)) return a; + if (!isBlank(b)) return b; + return null; + } + + /** + * Resolve a glossary's user-facing label from the entity repository. + * Returns null if {@code glossaryId} is null, the entity is gone, or the + * lookup fails — callers should treat this as a best-effort fallback. + */ + private String lookupGlossaryDisplayName(UUID glossaryId) { + if (glossaryId == null) { + return null; + } + try { + var glossaryRepo = Entity.getEntityRepository(Entity.GLOSSARY); + var glossary = + (org.openmetadata.schema.entity.data.Glossary) + glossaryRepo.get( + null, + glossaryId, + glossaryRepo.getFields(""), + org.openmetadata.schema.type.Include.NON_DELETED, + false); + return firstNonBlank(glossary.getDisplayName(), glossary.getName()); + } catch (Exception e) { + LOG.debug("Could not resolve display name for glossary {}: {}", glossaryId, e.getMessage()); + return null; + } + } + private String formatGlossaryRelationType(String relationUri) { String relation = extractPredicateName(relationUri); return formatRelationTypeName(relation); @@ -1521,27 +1619,36 @@ private String getGlossaryTermGraphFromDatabase( JsonUtils.getObjectMapper().createArrayNode(); try { - // Get glossary terms from database - var glossaryTermRepository = Entity.getEntityRepository("glossaryTerm"); + // Get glossary terms from database. ListFilter has no first-class + // "glossary" predicate for the glossary_term_entity table, so passing + // it via addQueryParam is a silent no-op and would leak every term in + // the system. Fetch the full list and scope by glossary id in memory. + var glossaryTermRepository = Entity.getEntityRepository(Entity.GLOSSARY_TERM); var listFilter = new org.openmetadata.service.jdbi3.ListFilter(null); - - if (glossaryId != null) { - listFilter.addQueryParam("glossary", glossaryId.toString()); - } - - var terms = + var fetched = glossaryTermRepository.listAll( glossaryTermRepository.getFields("relatedTerms,parent,children"), listFilter); + var terms = new ArrayList(); + String wantedGlossaryId = glossaryId != null ? glossaryId.toString() : null; + for (var entity : fetched) { + var term = (org.openmetadata.schema.entity.data.GlossaryTerm) entity; + if (wantedGlossaryId != null + && (term.getGlossary() == null + || term.getGlossary().getId() == null + || !wantedGlossaryId.equals(term.getGlossary().getId().toString()))) { + continue; + } + terms.add(term); + } Set addedNodes = new HashSet<>(); Set termsWithRelations = new HashSet<>(); Set edgeKeys = new HashSet<>(); int count = 0; - for (var entity : terms) { + for (var term : terms) { if (count >= limit) break; - var term = (org.openmetadata.schema.entity.data.GlossaryTerm) entity; String termId = term.getId().toString(); boolean hasRelations = @@ -2628,10 +2735,23 @@ public void removeGlossaryTermRelation(UUID fromTermId, UUID toTermId, String re String toUri = config.getBaseUri().toString() + "entity/glossaryTerm/" + toTermId; String predicateUri = getGlossaryTermRelationPredicateUri(relationType); + // Delete BOTH directions. The add path runs through + // EntityRepository.addRelationship which writes the reverse direction + // for bidirectional relationships, so a one-sided delete leaves a + // stale " om: " triple — visible as a lingering + // edge in the relations graph after the user removed the relation. String sparqlUpdate = String.format( - "DELETE WHERE { GRAPH <%s> { <%s> <%s> <%s> } }", - KNOWLEDGE_GRAPH, fromUri, predicateUri, toUri); + "DELETE WHERE { GRAPH <%s> { <%s> <%s> <%s> } };" + + "DELETE WHERE { GRAPH <%s> { <%s> <%s> <%s> } }", + KNOWLEDGE_GRAPH, + fromUri, + predicateUri, + toUri, + KNOWLEDGE_GRAPH, + toUri, + predicateUri, + fromUri); storageService.executeSparqlUpdate(sparqlUpdate); LOG.debug("Removed glossary term relation {} -> {} ({})", fromTermId, toTermId, relationType); @@ -2977,7 +3097,7 @@ public String exportGlossaryAsOntology(UUID glossaryId, String format, boolean i glossaryResource.addProperty(skosDefinition, glossary.getDescription()); } - var glossaryTermRepository = Entity.getEntityRepository("glossaryTerm"); + var glossaryTermRepository = Entity.getEntityRepository(Entity.GLOSSARY_TERM); var listFilter = new org.openmetadata.service.jdbi3.ListFilter(null); listFilter.addQueryParam("glossary", glossaryId.toString()); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/rdf/RdfUpdater.java b/openmetadata-service/src/main/java/org/openmetadata/service/rdf/RdfUpdater.java index d491cc6fb639..64018ca3db3f 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/rdf/RdfUpdater.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/rdf/RdfUpdater.java @@ -8,6 +8,8 @@ import org.openmetadata.schema.api.configuration.rdf.RdfConfiguration; import org.openmetadata.schema.type.EntityReference; import org.openmetadata.schema.type.EntityRelationship; +import org.openmetadata.schema.type.Relationship; +import org.openmetadata.service.Entity; import org.openmetadata.service.monitoring.RequestLatencyContext; import org.openmetadata.service.util.AsyncService; @@ -72,6 +74,15 @@ public static void addRelationship(EntityRelationship relationship) { if (rdfRepository == null || !rdfRepository.isEnabled()) { return; } + if (isGlossaryTermRelatedTo(relationship)) { + // Glossary term ⇔ glossary term RELATED_TO is owned by the typed path + // (addGlossaryTermRelation), which writes the precise predicate — + // skos:exactMatch for synonym, skos:broader for broader, om:relatedTo + // for relatedTo, etc. The generic addRelationship would unconditionally + // write om:relatedTo on top of that, so every type change would leak a + // residual om:relatedTo triple that nothing later cleans up. + return; + } submitAsync( "addRelationship", () -> { @@ -90,6 +101,11 @@ public static void removeRelationship(EntityRelationship relationship) { if (rdfRepository == null || !rdfRepository.isEnabled()) { return; } + if (isGlossaryTermRelatedTo(relationship)) { + // See addRelationship — the typed removal path + // (removeGlossaryTermRelation) owns these deletions. + return; + } submitAsync( "removeRelationship", () -> { @@ -104,6 +120,12 @@ public static void removeRelationship(EntityRelationship relationship) { }); } + private static boolean isGlossaryTermRelatedTo(EntityRelationship relationship) { + return Entity.GLOSSARY_TERM.equals(relationship.getFromEntity()) + && Entity.GLOSSARY_TERM.equals(relationship.getToEntity()) + && relationship.getRelationshipType() == Relationship.RELATED_TO; + } + public static boolean isEnabled() { return rdfRepository != null && rdfRepository.isEnabled(); } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/rdf/translator/RdfPropertyMapper.java b/openmetadata-service/src/main/java/org/openmetadata/service/rdf/translator/RdfPropertyMapper.java index afa85192a0d8..2eeeeee033b5 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/rdf/translator/RdfPropertyMapper.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/rdf/translator/RdfPropertyMapper.java @@ -1128,7 +1128,17 @@ private void addTypedProperty( XSDDatatype datatype = getXSDDatatype(xsdType); if (datatype != null && !value.isNull()) { - resource.addProperty(property, model.createTypedLiteral(value.asText(), datatype)); + String literal = value.asText(); + // Skip blank xsd:string triples. An empty literal carries no real + // information and downstream readers had to special-case it — most + // visibly skos:prefLabel="" winning over rdfs:label in the glossary + // term graph SPARQL. By not writing the triple at all, OPTIONAL + // patterns and COALESCE on the read side behave correctly with no + // extra logic. + if (XSDDatatype.XSDstring.equals(datatype) && literal.isBlank()) { + return; + } + resource.addProperty(property, model.createTypedLiteral(literal, datatype)); } } else { addSimpleProperty(resource, propertyId, value, model); diff --git a/openmetadata-ui/src/main/resources/ui/src/components/OntologyExplorer/utils/graphBuilders.ts b/openmetadata-ui/src/main/resources/ui/src/components/OntologyExplorer/utils/graphBuilders.ts index 292ebe86149a..c174549a8ceb 100644 --- a/openmetadata-ui/src/main/resources/ui/src/components/OntologyExplorer/utils/graphBuilders.ts +++ b/openmetadata-ui/src/main/resources/ui/src/components/OntologyExplorer/utils/graphBuilders.ts @@ -121,8 +121,12 @@ export function convertRdfGraphToOntologyGraph( }); const nodes: OntologyNode[] = rdfData.nodes.map((node) => { - let glossaryId: string | undefined; - if (node.group) { + // Prefer the explicit glossaryId from the RDF endpoint — it survives + // glossary rename / display-name drift better than the FQN-prefix + // heuristic. Fall back to looking up the glossary by `group` (display + // name) or the FQN's first segment for backwards-compatible payloads. + let glossaryId: string | undefined = node.glossaryId; + if (!glossaryId && node.group) { glossaryId = glossaryNameToId.get(node.group.toLowerCase()); } if (!glossaryId && node.fullyQualifiedName) { diff --git a/openmetadata-ui/src/main/resources/ui/src/components/OntologyExplorer/utils/hierarchyGraphBuilder.ts b/openmetadata-ui/src/main/resources/ui/src/components/OntologyExplorer/utils/hierarchyGraphBuilder.ts index bf3f0cec64d9..5e12fdf43dba 100644 --- a/openmetadata-ui/src/main/resources/ui/src/components/OntologyExplorer/utils/hierarchyGraphBuilder.ts +++ b/openmetadata-ui/src/main/resources/ui/src/components/OntologyExplorer/utils/hierarchyGraphBuilder.ts @@ -247,7 +247,15 @@ export function buildHierarchyGraphs({ } const comboId = `hierarchy-combo-${glossaryId}`; - const comboLabel = glossaryNames[glossaryId] ?? glossaryId; + // Prefer the glossary's name from the caller's visible glossary list, but + // fall back to a `group` value carried on any term node (the RDF endpoint + // populates this from the glossary's om:name) so callers who can see the + // term but not the parent glossary still see a human label instead of the + // raw UUID. + const groupFallback = comboNodes + .map((n) => n.group) + .find((g): g is string => typeof g === 'string' && g.length > 0); + const comboLabel = glossaryNames[glossaryId] ?? groupFallback ?? glossaryId; combos.push({ id: comboId, label: comboLabel, glossaryId }); nodesToShow.forEach((n) => nodes.push(n)); keptEdges.forEach((e) => diff --git a/openmetadata-ui/src/main/resources/ui/src/rest/rdfAPI.interface.ts b/openmetadata-ui/src/main/resources/ui/src/rest/rdfAPI.interface.ts index cfeecc0771ab..116c3b391f52 100644 --- a/openmetadata-ui/src/main/resources/ui/src/rest/rdfAPI.interface.ts +++ b/openmetadata-ui/src/main/resources/ui/src/rest/rdfAPI.interface.ts @@ -16,7 +16,13 @@ export interface GraphNode { id: string; label: string; type: string; + // Human label of the parent glossary (set by the RDF endpoint so the UI + // hierarchy view can show a group name even when the parent Glossary is + // not in the caller's accessible glossary list). group?: string; + // UUID of the parent glossary; supplied by the RDF endpoint when the + // term-to-glossary membership triple is available. + glossaryId?: string; title?: string; fullyQualifiedName?: string; description?: string; From e5b8b512fa2a9d6b0ed5a25e1353d9466973d4d0 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Sat, 16 May 2026 20:21:54 -0700 Subject: [PATCH 2/4] fix(rdf): scope DB fallback via fqnHash prefix instead of full table scan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address PR #28199 review: getGlossaryTermGraphFromDatabase previously loaded every glossary term in the deployment with listAll() and then filtered by glossary.id in a loop. That defeats the point of scoping and creates memory / GC pressure on every fallback call. Now drive the scoping at the DB level by calling glossaryTermDAO.getNestedTerms(glossaryFqn), which executes `SELECT json FROM glossary_term_entity WHERE fqnHash LIKE :prefix` — an indexed prefix scan that only returns the requested glossary's terms. Parse the result JSON for ids, then bulk-hydrate relatedTerms / parent / children via EntityRepository.get(uri, ids, fields, include) in one batched call. The unscoped branch (glossaryId == null) still uses listAll because the endpoint is documented to support that case; the regression was strictly about silently doing a full scan when the caller DID supply a scope. All 9 RdfGlossaryGraphIT tests remain green. --- .../service/rdf/RdfRepository.java | 68 +++++++++++++------ 1 file changed, 49 insertions(+), 19 deletions(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/rdf/RdfRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/rdf/RdfRepository.java index 77c48d22c59e..1b306341aa7d 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/rdf/RdfRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/rdf/RdfRepository.java @@ -1619,26 +1619,56 @@ private String getGlossaryTermGraphFromDatabase( JsonUtils.getObjectMapper().createArrayNode(); try { - // Get glossary terms from database. ListFilter has no first-class - // "glossary" predicate for the glossary_term_entity table, so passing - // it via addQueryParam is a silent no-op and would leak every term in - // the system. Fetch the full list and scope by glossary id in memory. - var glossaryTermRepository = Entity.getEntityRepository(Entity.GLOSSARY_TERM); - var listFilter = new org.openmetadata.service.jdbi3.ListFilter(null); - var fetched = - glossaryTermRepository.listAll( - glossaryTermRepository.getFields("relatedTerms,parent,children"), listFilter); - var terms = new ArrayList(); - String wantedGlossaryId = glossaryId != null ? glossaryId.toString() : null; - for (var entity : fetched) { - var term = (org.openmetadata.schema.entity.data.GlossaryTerm) entity; - if (wantedGlossaryId != null - && (term.getGlossary() == null - || term.getGlossary().getId() == null - || !wantedGlossaryId.equals(term.getGlossary().getId().toString()))) { - continue; + // Resolve the glossary terms with DB-level scoping. ListFilter has no + // first-class "glossary" predicate for the glossary_term_entity table, + // so the previous addQueryParam("glossary", …) was a silent no-op that + // leaked every term in the deployment and then filtered them in + // memory. The fqnHash LIKE '.%' query in + // glossaryTermDAO.getNestedTerms scopes at the database, then we + // batch-hydrate the related fields the parsing loop needs. + var glossaryTermRepository = + (org.openmetadata.service.jdbi3.GlossaryTermRepository) + Entity.getEntityRepository(Entity.GLOSSARY_TERM); + List terms; + if (glossaryId != null) { + var glossaryRepo = Entity.getEntityRepository(Entity.GLOSSARY); + var glossary = + (org.openmetadata.schema.entity.data.Glossary) + glossaryRepo.get( + null, + glossaryId, + glossaryRepo.getFields(""), + org.openmetadata.schema.type.Include.NON_DELETED, + false); + List nestedJsons = + Entity.getCollectionDAO() + .glossaryTermDAO() + .getNestedTerms(glossary.getFullyQualifiedName()); + List ids = new ArrayList<>(nestedJsons.size()); + for (String json : nestedJsons) { + var stub = + JsonUtils.readValue(json, org.openmetadata.schema.entity.data.GlossaryTerm.class); + if (stub.getId() != null) { + ids.add(stub.getId()); + } + } + terms = + ids.isEmpty() + ? new ArrayList<>() + : glossaryTermRepository.get( + null, + ids, + glossaryTermRepository.getFields("relatedTerms,parent,children"), + org.openmetadata.schema.type.Include.NON_DELETED); + } else { + var listFilter = new org.openmetadata.service.jdbi3.ListFilter(null); + var fetched = + glossaryTermRepository.listAll( + glossaryTermRepository.getFields("relatedTerms,parent,children"), listFilter); + terms = new ArrayList<>(fetched.size()); + for (var entity : fetched) { + terms.add((org.openmetadata.schema.entity.data.GlossaryTerm) entity); } - terms.add(term); } Set addedNodes = new HashSet<>(); From e181f948eebe88431907cd596f50a42560678474 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Sat, 16 May 2026 20:39:42 -0700 Subject: [PATCH 3/4] refactor(rdf): reuse the /v1/glossaryTerms listing path + shorten imports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per review on PR #28199: 1. Drive the DB fallback through the SAME code path as `GET /v1/glossaryTerms?glossary=` instead of a parallel getNestedTerms + manual hydration sequence. That endpoint sets `filter.addQueryParam("parent", glossaryFqn)`, and `ListFilter.getParentCondition` turns it into a fqnHash LIKE prefix predicate via `getFqnPrefixCondition` — the same indexed prefix scan, but routed through `listAll` so cursor pagination, field hydration, and bulk relationship fetching all come for free. Net result: one DB-scoped read path used by both the public listing endpoint and the RDF graph DB fallback. 2. Replace fully-qualified type names introduced in the earlier commit (`org.openmetadata.schema.entity.data.GlossaryTerm`, `…Glossary`, `…Include.NON_DELETED`, `org.openmetadata.service.jdbi3.GlossaryTermRepository`, `…ListFilter`) with proper imports. Also collapsed the pre-existing `Relationship` FQN at line 348 since the new import made it redundant. All 9 RdfGlossaryGraphIT tests remain green. --- .../service/rdf/RdfRepository.java | 91 +++++++------------ 1 file changed, 33 insertions(+), 58 deletions(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/rdf/RdfRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/rdf/RdfRepository.java index 1b306341aa7d..67781b728534 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/rdf/RdfRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/rdf/RdfRepository.java @@ -23,12 +23,18 @@ import org.openmetadata.schema.api.configuration.rdf.RdfConfiguration; import org.openmetadata.schema.configuration.GlossaryTermRelationSettings; import org.openmetadata.schema.configuration.RelationCardinality; +import org.openmetadata.schema.entity.data.Glossary; +import org.openmetadata.schema.entity.data.GlossaryTerm; import org.openmetadata.schema.settings.SettingsType; import org.openmetadata.schema.type.EntityReference; import org.openmetadata.schema.type.EntityRelationship; +import org.openmetadata.schema.type.Include; +import org.openmetadata.schema.type.Relationship; import org.openmetadata.schema.utils.JsonUtils; import org.openmetadata.service.Entity; import org.openmetadata.service.exception.EntityNotFoundException; +import org.openmetadata.service.jdbi3.GlossaryTermRepository; +import org.openmetadata.service.jdbi3.ListFilter; import org.openmetadata.service.rdf.storage.RdfStorageFactory; import org.openmetadata.service.rdf.storage.RdfStorageInterface; import org.openmetadata.service.rdf.translator.JsonLdTranslator; @@ -339,8 +345,7 @@ static String getRelationshipPredicateUri(String relationshipType) { private static Set computeRelationshipHookPredicates() { Set predicates = new LinkedHashSet<>(); - for (org.openmetadata.schema.type.Relationship rel : - org.openmetadata.schema.type.Relationship.values()) { + for (Relationship rel : Relationship.values()) { String value = rel.value(); // Lineage is owned by addLineageWithDetails — its DELETE is scoped to // the lineageDetails sub-resource, not the relationship hook layer. @@ -1541,13 +1546,9 @@ private String lookupGlossaryDisplayName(UUID glossaryId) { try { var glossaryRepo = Entity.getEntityRepository(Entity.GLOSSARY); var glossary = - (org.openmetadata.schema.entity.data.Glossary) + (Glossary) glossaryRepo.get( - null, - glossaryId, - glossaryRepo.getFields(""), - org.openmetadata.schema.type.Include.NON_DELETED, - false); + null, glossaryId, glossaryRepo.getFields(""), Include.NON_DELETED, false); return firstNonBlank(glossary.getDisplayName(), glossary.getName()); } catch (Exception e) { LOG.debug("Could not resolve display name for glossary {}: {}", glossaryId, e.getMessage()); @@ -1619,56 +1620,31 @@ private String getGlossaryTermGraphFromDatabase( JsonUtils.getObjectMapper().createArrayNode(); try { - // Resolve the glossary terms with DB-level scoping. ListFilter has no - // first-class "glossary" predicate for the glossary_term_entity table, - // so the previous addQueryParam("glossary", …) was a silent no-op that - // leaked every term in the deployment and then filtered them in - // memory. The fqnHash LIKE '.%' query in - // glossaryTermDAO.getNestedTerms scopes at the database, then we - // batch-hydrate the related fields the parsing loop needs. + // Reuse the exact code path the /v1/glossaryTerms?glossary= listing + // takes: resolve the glossary's FQN, then drive listAfter with the + // `parent` filter. ListFilter.getParentCondition translates that into a + // fqnHash LIKE '.%' predicate (see + // ListFilter.getFqnPrefixCondition) which is an indexed prefix scan + // scoped to that glossary — never the full table. The previous + // implementation called listAll() and filtered by glossary.id in a Java + // loop, which loaded every term in the deployment into memory. var glossaryTermRepository = - (org.openmetadata.service.jdbi3.GlossaryTermRepository) - Entity.getEntityRepository(Entity.GLOSSARY_TERM); - List terms; + (GlossaryTermRepository) Entity.getEntityRepository(Entity.GLOSSARY_TERM); + var listFilter = new ListFilter(null); if (glossaryId != null) { var glossaryRepo = Entity.getEntityRepository(Entity.GLOSSARY); var glossary = - (org.openmetadata.schema.entity.data.Glossary) + (Glossary) glossaryRepo.get( - null, - glossaryId, - glossaryRepo.getFields(""), - org.openmetadata.schema.type.Include.NON_DELETED, - false); - List nestedJsons = - Entity.getCollectionDAO() - .glossaryTermDAO() - .getNestedTerms(glossary.getFullyQualifiedName()); - List ids = new ArrayList<>(nestedJsons.size()); - for (String json : nestedJsons) { - var stub = - JsonUtils.readValue(json, org.openmetadata.schema.entity.data.GlossaryTerm.class); - if (stub.getId() != null) { - ids.add(stub.getId()); - } - } - terms = - ids.isEmpty() - ? new ArrayList<>() - : glossaryTermRepository.get( - null, - ids, - glossaryTermRepository.getFields("relatedTerms,parent,children"), - org.openmetadata.schema.type.Include.NON_DELETED); - } else { - var listFilter = new org.openmetadata.service.jdbi3.ListFilter(null); - var fetched = - glossaryTermRepository.listAll( - glossaryTermRepository.getFields("relatedTerms,parent,children"), listFilter); - terms = new ArrayList<>(fetched.size()); - for (var entity : fetched) { - terms.add((org.openmetadata.schema.entity.data.GlossaryTerm) entity); - } + null, glossaryId, glossaryRepo.getFields(""), Include.NON_DELETED, false); + listFilter.addQueryParam("parent", glossary.getFullyQualifiedName()); + } + List terms = new ArrayList<>(); + var fetched = + glossaryTermRepository.listAll( + glossaryTermRepository.getFields("relatedTerms,parent,children"), listFilter); + for (var entity : fetched) { + terms.add((GlossaryTerm) entity); } Set addedNodes = new HashSet<>(); @@ -3114,8 +3090,7 @@ public String exportGlossaryAsOntology(UUID glossaryId, String format, boolean i Property rdfsLabel = model.createProperty("http://www.w3.org/2000/01/rdf-schema#", "label"); try { - org.openmetadata.schema.entity.data.Glossary glossary = - Entity.getEntity("glossary", glossaryId, "*", null); + Glossary glossary = Entity.getEntity("glossary", glossaryId, "*", null); String glossaryUri = config.getBaseUri().toString() + "glossary/" + glossaryId; Resource glossaryResource = model.createResource(glossaryUri); @@ -3128,7 +3103,7 @@ public String exportGlossaryAsOntology(UUID glossaryId, String format, boolean i } var glossaryTermRepository = Entity.getEntityRepository(Entity.GLOSSARY_TERM); - var listFilter = new org.openmetadata.service.jdbi3.ListFilter(null); + var listFilter = new ListFilter(null); listFilter.addQueryParam("glossary", glossaryId.toString()); var terms = @@ -3139,7 +3114,7 @@ public String exportGlossaryAsOntology(UUID glossaryId, String format, boolean i Map termResources = new HashMap<>(); for (var entity : terms) { - var term = (org.openmetadata.schema.entity.data.GlossaryTerm) entity; + var term = (GlossaryTerm) entity; String termUri = config.getBaseUri().toString() + "glossaryTerm/" + term.getId(); Resource termResource = model.createResource(termUri); @@ -3169,7 +3144,7 @@ public String exportGlossaryAsOntology(UUID glossaryId, String format, boolean i if (includeRelations) { for (var entity : terms) { - var term = (org.openmetadata.schema.entity.data.GlossaryTerm) entity; + var term = (GlossaryTerm) entity; Resource termResource = termResources.get(term.getId()); if (term.getParent() != null && term.getParent().getId() != null) { From 312d3a6099e31d016dcca3e67d0326c897c77fd4 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Sat, 16 May 2026 21:55:17 -0700 Subject: [PATCH 4/4] test(rdf): fill coverage gaps surfaced during PR review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - RdfPropertyMapperTest.AddTypedPropertyBlankString — direct unit test for the blank xsd:string skip in addTypedProperty. Covers blank, all- whitespace, populated, and the negative case (skip is intentionally scoped to xsd:string so "0" / xsd:integer literals still get emitted). - RdfUpdaterTest (new) — verifies the glossaryTerm⇔glossaryTerm RELATED_TO short-circuit on both addRelationship and removeRelationship: short-circuit shape never reaches the repository, while cross-entity RELATED_TO (table→glossaryTerm) and other relationship types (CONTAINS between glossary terms) still flow through. Uses reflection to inject a Mockito mock + Awaitility to bridge the async submit path. - graphBuilders.test.ts (new) — convertRdfGraphToOntologyGraph prefers the explicit node.glossaryId from the RDF response over the FQN-prefix heuristic; falls back to node.group then FQN; preserves group passthrough so the combo can fall back to it; replaces a UUID-shaped label with the last FQN segment. - hierarchyGraphBuilder.test.ts (new) — combo label resolution order: glossaryNames[id] > non-blank node.group > raw glossaryId. Covers the missing-from-glossaryNames case that previously rendered as a raw UUID in the UI hierarchy view. All 41 new + existing tests across both modules pass. --- .../service/rdf/RdfPropertyMapperTest.java | 79 ++++++++ .../service/rdf/RdfUpdaterTest.java | 173 ++++++++++++++++++ .../utils/graphBuilders.test.ts | 148 +++++++++++++++ .../utils/hierarchyGraphBuilder.test.ts | 103 +++++++++++ 4 files changed, 503 insertions(+) create mode 100644 openmetadata-service/src/test/java/org/openmetadata/service/rdf/RdfUpdaterTest.java create mode 100644 openmetadata-ui/src/main/resources/ui/src/components/OntologyExplorer/utils/graphBuilders.test.ts create mode 100644 openmetadata-ui/src/main/resources/ui/src/components/OntologyExplorer/utils/hierarchyGraphBuilder.test.ts diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/rdf/RdfPropertyMapperTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/rdf/RdfPropertyMapperTest.java index 52dbdad2df75..49441df3b7a3 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/rdf/RdfPropertyMapperTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/rdf/RdfPropertyMapperTest.java @@ -1065,6 +1065,85 @@ void testHelperMethodsResolveNamespacesContextsAndDatatypes() throws Exception { } } + @Nested + @DisplayName("addTypedProperty: blank xsd:string skip") + class AddTypedPropertyBlankString { + + @Test + @DisplayName("Blank xsd:string value should not produce a literal triple") + void blankStringIsNotEmitted() throws Exception { + JsonNode blank = objectMapper.getNodeFactory().textNode(""); + invokePrivate( + "addTypedProperty", + new Class[] {Resource.class, String.class, JsonNode.class, String.class, Model.class}, + entityResource, + "skos:prefLabel", + blank, + "xsd:string", + model); + + Property pref = model.createProperty(SKOS.getURI(), "prefLabel"); + assertFalse( + model.contains(entityResource, pref), + "Blank xsd:string literals must not be emitted — they masked rdfs:label " + + "on the read side and rendered as empty UI labels"); + } + + @Test + @DisplayName("Whitespace-only xsd:string value should not produce a literal triple") + void whitespaceOnlyStringIsNotEmitted() throws Exception { + JsonNode whitespace = objectMapper.getNodeFactory().textNode(" "); + invokePrivate( + "addTypedProperty", + new Class[] {Resource.class, String.class, JsonNode.class, String.class, Model.class}, + entityResource, + "skos:prefLabel", + whitespace, + "xsd:string", + model); + + Property pref = model.createProperty(SKOS.getURI(), "prefLabel"); + assertFalse(model.contains(entityResource, pref)); + } + + @Test + @DisplayName("Non-blank xsd:string value should still be emitted") + void nonBlankStringIsEmitted() throws Exception { + JsonNode value = objectMapper.getNodeFactory().textNode("Pretty Name"); + invokePrivate( + "addTypedProperty", + new Class[] {Resource.class, String.class, JsonNode.class, String.class, Model.class}, + entityResource, + "skos:prefLabel", + value, + "xsd:string", + model); + + Property pref = model.createProperty(SKOS.getURI(), "prefLabel"); + assertTrue(model.contains(entityResource, pref, "Pretty Name")); + } + + @Test + @DisplayName("Blank value with a non-xsd:string type should still be emitted") + void blankNonStringIsEmitted() throws Exception { + // Non-string xsd types (numbers, booleans, dates) get their own validation + // path elsewhere — the skip is intentionally narrow to xsd:string so it + // doesn't accidentally drop "0" literals or similar. + JsonNode zero = objectMapper.getNodeFactory().textNode("0"); + invokePrivate( + "addTypedProperty", + new Class[] {Resource.class, String.class, JsonNode.class, String.class, Model.class}, + entityResource, + "om:counter", + zero, + "xsd:integer", + model); + + Property counter = model.createProperty(OM_NS, "counter"); + assertTrue(model.contains(entityResource, counter)); + } + } + private Object invokePrivate(String name, Class[] parameterTypes, Object... args) throws Exception { java.lang.reflect.Method method = diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/rdf/RdfUpdaterTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/rdf/RdfUpdaterTest.java new file mode 100644 index 000000000000..7649b1b441b0 --- /dev/null +++ b/openmetadata-service/src/test/java/org/openmetadata/service/rdf/RdfUpdaterTest.java @@ -0,0 +1,173 @@ +/* + * Copyright 2025 Collate + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.openmetadata.service.rdf; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.lang.reflect.Field; +import java.time.Duration; +import java.util.UUID; +import org.awaitility.Awaitility; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; +import org.openmetadata.schema.type.EntityRelationship; +import org.openmetadata.schema.type.Relationship; +import org.openmetadata.service.Entity; + +/** + * Unit tests for {@link RdfUpdater}, specifically the glossary-term ⇔ + * glossary-term {@code RELATED_TO} short-circuit. The generic relationship + * hooks unconditionally wrote {@code om:relatedTo} on top of the typed + * predicate ({@code skos:exactMatch}, {@code skos:broader}, …) emitted by + * {@link RdfRepository#addGlossaryTermRelation}, leaving a residual edge + * after a user changed the relation type from "relatedTo" to "broader". + * Verifies the short-circuit fires for the targeted shape and only for that + * shape, so other relationships (CONTAINS, OWNS, cross-entity RELATED_TO, + * etc.) still flow through the underlying repository. + */ +class RdfUpdaterTest { + + private RdfRepository originalRepository; + private RdfRepository mockRepository; + + @BeforeEach + void setUp() throws Exception { + mockRepository = Mockito.mock(RdfRepository.class); + when(mockRepository.isEnabled()).thenReturn(true); + originalRepository = swapRdfRepository(mockRepository); + } + + @AfterEach + void tearDown() throws Exception { + swapRdfRepository(originalRepository); + } + + @Nested + @DisplayName("addRelationship short-circuits glossaryTerm⇔glossaryTerm RELATED_TO") + class AddRelationship { + + @Test + @DisplayName("glossaryTerm RELATED_TO glossaryTerm should NOT reach the repository") + void glossaryTermRelatedToIsShortCircuited() { + EntityRelationship rel = + new EntityRelationship() + .withFromId(UUID.randomUUID()) + .withToId(UUID.randomUUID()) + .withFromEntity(Entity.GLOSSARY_TERM) + .withToEntity(Entity.GLOSSARY_TERM) + .withRelationshipType(Relationship.RELATED_TO); + + RdfUpdater.addRelationship(rel); + + verify(mockRepository, never()).addRelationship(any()); + } + + @Test + @DisplayName("Cross-entity RELATED_TO (e.g. table → glossaryTerm) still flows through") + void crossEntityRelatedToIsNotShortCircuited() { + EntityRelationship rel = + new EntityRelationship() + .withFromId(UUID.randomUUID()) + .withToId(UUID.randomUUID()) + .withFromEntity(Entity.TABLE) + .withToEntity(Entity.GLOSSARY_TERM) + .withRelationshipType(Relationship.RELATED_TO); + + RdfUpdater.addRelationship(rel); + + Awaitility.await() + .atMost(Duration.ofSeconds(5)) + .untilAsserted(() -> verify(mockRepository, times(1)).addRelationship(rel)); + } + + @Test + @DisplayName("Non-RELATED_TO between two glossary terms still flows through") + void otherRelationshipBetweenGlossaryTermsIsNotShortCircuited() { + EntityRelationship rel = + new EntityRelationship() + .withFromId(UUID.randomUUID()) + .withToId(UUID.randomUUID()) + .withFromEntity(Entity.GLOSSARY_TERM) + .withToEntity(Entity.GLOSSARY_TERM) + .withRelationshipType(Relationship.CONTAINS); + + RdfUpdater.addRelationship(rel); + + Awaitility.await() + .atMost(Duration.ofSeconds(5)) + .untilAsserted(() -> verify(mockRepository, times(1)).addRelationship(rel)); + } + } + + @Nested + @DisplayName("removeRelationship short-circuits glossaryTerm⇔glossaryTerm RELATED_TO") + class RemoveRelationship { + + @Test + @DisplayName("glossaryTerm RELATED_TO glossaryTerm should NOT reach the repository") + void glossaryTermRelatedToIsShortCircuited() { + EntityRelationship rel = + new EntityRelationship() + .withFromId(UUID.randomUUID()) + .withToId(UUID.randomUUID()) + .withFromEntity(Entity.GLOSSARY_TERM) + .withToEntity(Entity.GLOSSARY_TERM) + .withRelationshipType(Relationship.RELATED_TO); + + RdfUpdater.removeRelationship(rel); + + verify(mockRepository, never()).removeRelationship(any()); + } + + @Test + @DisplayName("Cross-entity RELATED_TO still flows through to repository") + void crossEntityRelatedToIsNotShortCircuited() { + EntityRelationship rel = + new EntityRelationship() + .withFromId(UUID.randomUUID()) + .withToId(UUID.randomUUID()) + .withFromEntity(Entity.TABLE) + .withToEntity(Entity.GLOSSARY_TERM) + .withRelationshipType(Relationship.RELATED_TO); + + RdfUpdater.removeRelationship(rel); + + Awaitility.await() + .atMost(Duration.ofSeconds(5)) + .untilAsserted(() -> verify(mockRepository, times(1)).removeRelationship(rel)); + } + } + + /** + * Replace the private static {@code rdfRepository} field via reflection + * and return the previous value so tests can restore it. Required because + * RdfUpdater intentionally exposes no setter — the singleton is wired + * via {@link RdfUpdater#initialize(org.openmetadata.schema.api.configuration.rdf.RdfConfiguration)} + * which would actually connect to Fuseki. + */ + private static RdfRepository swapRdfRepository(RdfRepository replacement) throws Exception { + Field field = RdfUpdater.class.getDeclaredField("rdfRepository"); + field.setAccessible(true); + RdfRepository previous = (RdfRepository) field.get(null); + field.set(null, replacement); + return previous; + } +} diff --git a/openmetadata-ui/src/main/resources/ui/src/components/OntologyExplorer/utils/graphBuilders.test.ts b/openmetadata-ui/src/main/resources/ui/src/components/OntologyExplorer/utils/graphBuilders.test.ts new file mode 100644 index 000000000000..956ce536c433 --- /dev/null +++ b/openmetadata-ui/src/main/resources/ui/src/components/OntologyExplorer/utils/graphBuilders.test.ts @@ -0,0 +1,148 @@ +/* + * Copyright 2025 Collate. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import { Glossary } from '../../../generated/entity/data/glossary'; +import { GraphData } from '../../../rest/rdfAPI.interface'; +import { convertRdfGraphToOntologyGraph } from './graphBuilders'; + +const glossaries: Glossary[] = [ + { + id: 'gloss-finance-id', + name: 'Finance', + fullyQualifiedName: 'Finance', + description: 'd', + } as Glossary, + { + id: 'gloss-renamed-id', + name: 'NewName', + // The RDF response carries the OLD FQN of a renamed glossary in + // node.fullyQualifiedName until the RDF projection catches up. The + // FQN-prefix heuristic would resolve "OldName" → undefined, but the + // explicit glossaryId on the node should still bind correctly. + fullyQualifiedName: 'NewName', + description: 'd', + } as Glossary, +]; + +describe('convertRdfGraphToOntologyGraph', () => { + it('prefers the explicit glossaryId from the response over FQN heuristic', () => { + const rdf: GraphData = { + nodes: [ + { + id: 'term-1', + label: 'Revenue', + type: 'glossaryTerm', + glossaryId: 'gloss-renamed-id', + // Drift: old FQN no longer matches the renamed glossary; the + // explicit glossaryId on the node MUST win. + fullyQualifiedName: 'OldName.Revenue', + }, + ], + edges: [], + }; + + const result = convertRdfGraphToOntologyGraph(rdf, glossaries); + + expect(result.nodes[0].glossaryId).toBe('gloss-renamed-id'); + }); + + it('falls back to FQN-prefix lookup when glossaryId is not on the node', () => { + const rdf: GraphData = { + nodes: [ + { + id: 'term-1', + label: 'Revenue', + type: 'glossaryTerm', + fullyQualifiedName: 'Finance.Revenue', + }, + ], + edges: [], + }; + + const result = convertRdfGraphToOntologyGraph(rdf, glossaries); + + expect(result.nodes[0].glossaryId).toBe('gloss-finance-id'); + }); + + it('falls back to node.group when no glossaryId and FQN does not match', () => { + const rdf: GraphData = { + nodes: [ + { + id: 'term-1', + label: 'Revenue', + type: 'glossaryTerm', + group: 'Finance', + }, + ], + edges: [], + }; + + const result = convertRdfGraphToOntologyGraph(rdf, glossaries); + + expect(result.nodes[0].glossaryId).toBe('gloss-finance-id'); + }); + + it('leaves glossaryId undefined when nothing resolves', () => { + const rdf: GraphData = { + nodes: [ + { + id: 'term-1', + label: 'Unknown', + type: 'glossaryTerm', + fullyQualifiedName: 'NonExistent.Term', + }, + ], + edges: [], + }; + + const result = convertRdfGraphToOntologyGraph(rdf, glossaries); + + expect(result.nodes[0].glossaryId).toBeUndefined(); + }); + + it('keeps the node group passthrough so the combo can fall back to it', () => { + const rdf: GraphData = { + nodes: [ + { + id: 'term-1', + label: 'Revenue', + type: 'glossaryTerm', + glossaryId: 'gloss-finance-id', + group: 'Finance', + }, + ], + edges: [], + }; + + const result = convertRdfGraphToOntologyGraph(rdf, glossaries); + + expect(result.nodes[0].group).toBe('Finance'); + }); + + it('replaces a UUID-shaped label with the last FQN segment', () => { + const rdf: GraphData = { + nodes: [ + { + id: 'term-1', + label: '12345678-1234-1234-1234-123456789012', + type: 'glossaryTerm', + fullyQualifiedName: 'Finance.Revenue', + }, + ], + edges: [], + }; + + const result = convertRdfGraphToOntologyGraph(rdf, glossaries); + + expect(result.nodes[0].label).toBe('Revenue'); + }); +}); diff --git a/openmetadata-ui/src/main/resources/ui/src/components/OntologyExplorer/utils/hierarchyGraphBuilder.test.ts b/openmetadata-ui/src/main/resources/ui/src/components/OntologyExplorer/utils/hierarchyGraphBuilder.test.ts new file mode 100644 index 000000000000..19cbc10a568a --- /dev/null +++ b/openmetadata-ui/src/main/resources/ui/src/components/OntologyExplorer/utils/hierarchyGraphBuilder.test.ts @@ -0,0 +1,103 @@ +/* + * Copyright 2025 Collate. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import { OntologyEdge, OntologyNode } from '../OntologyExplorer.interface'; +import { buildHierarchyGraphs } from './hierarchyGraphBuilder'; + +const RELATION_COLORS = { broader: '#000' }; + +function term(id: string, glossaryId: string, group?: string): OntologyNode { + return { + id, + label: id, + type: 'glossaryTerm', + glossaryId, + group, + }; +} + +function edge(from: string, to: string): OntologyEdge { + return { from, to, label: 'Broader', relationType: 'broader' }; +} + +describe('buildHierarchyGraphs combo label resolution', () => { + const parent = term('parent', 'gloss-id', 'Pharmaceuticals'); + const child = term('child', 'gloss-id', 'Pharmaceuticals'); + const terms = [parent, child]; + const relations = [edge('parent', 'child')]; + + it('uses the glossary name from glossaryNames when available', () => { + const result = buildHierarchyGraphs({ + terms, + relations, + relationSettings: null, + relationColors: RELATION_COLORS, + glossaryNames: { 'gloss-id': 'Pharmaceuticals' }, + }); + + expect(result.combos).toHaveLength(1); + expect(result.combos[0].label).toBe('Pharmaceuticals'); + }); + + it('falls back to node.group when glossaryNames lookup misses', () => { + // Simulates the prod scenario where a term belongs to a glossary the + // caller cannot see (permission gap, RDF-vs-DB inconsistency, etc.): + // the per-term `group` field carried by the RDF response must rescue + // the combo label so it never falls through to the raw UUID. + const result = buildHierarchyGraphs({ + terms, + relations, + relationSettings: null, + relationColors: RELATION_COLORS, + glossaryNames: {}, // caller cannot resolve gloss-id + }); + + expect(result.combos).toHaveLength(1); + expect(result.combos[0].label).toBe('Pharmaceuticals'); + expect(result.combos[0].label).not.toBe('gloss-id'); + }); + + it('falls through to the raw glossaryId only when both lookups fail', () => { + const termsNoGroup = [ + term('parent', 'gloss-id'), + term('child', 'gloss-id'), + ]; + + const result = buildHierarchyGraphs({ + terms: termsNoGroup, + relations, + relationSettings: null, + relationColors: RELATION_COLORS, + glossaryNames: {}, + }); + + expect(result.combos).toHaveLength(1); + expect(result.combos[0].label).toBe('gloss-id'); + }); + + it('ignores blank `group` strings when picking the fallback', () => { + const blankGroupTerms = [ + term('parent', 'gloss-id', ''), + term('child', 'gloss-id', 'Pharmaceuticals'), + ]; + + const result = buildHierarchyGraphs({ + terms: blankGroupTerms, + relations, + relationSettings: null, + relationColors: RELATION_COLORS, + glossaryNames: {}, + }); + + expect(result.combos[0].label).toBe('Pharmaceuticals'); + }); +});