Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
8bad8f4
Phase 0: EmailArchive nested-document metadata schema (#3660)
luis100 Apr 30, 2026
49e20a5
feat(search): Phase 1 — Advanced AIP Search nested filter groups
luis100 Apr 30, 2026
c27e938
feat(emailarchive): add title/level to ingest XSLT and HTML dissemina…
luis100 Apr 30, 2026
9e23d97
fix(emailarchive): use composite title in ingest XSLT
luis100 Apr 30, 2026
bc4447f
fix(emailarchive): add state=ACTIVE to nested email child documents
luis100 Apr 30, 2026
d3f0c14
Adding test for nested documents indexing and search
luis100 Apr 30, 2026
cad1303
fix(search): add missing appendANDOperator in block-join filter builders
luis100 May 4, 2026
4b016ba
fix(i18n): add missing GWT plural forms across locale files
luis100 May 4, 2026
3d1cbe9
fix(search): remove space in block-join queries to fix q.op=AND inter…
luis100 May 4, 2026
927985b
fix(search): fix block-join query format for q.op=AND compatibility
luis100 May 4, 2026
1fad674
fix(emailarchive): index dateStart/dateEnd as standard dateInitial/da…
luis100 May 4, 2026
925ca69
revert: remove AllFilterParameter suppression from parseFilter block-…
luis100 May 4, 2026
8922ae5
fix(search): handle wildcard values in block-join child sub-query bui…
luis100 May 4, 2026
71c5314
fix(search): fix email sent date filter format and selector type
luis100 May 4, 2026
d1516d0
Update Spanish translations
luis100 May 5, 2026
9a8d90c
fix(search): exclude nested child documents from normal AIP queries
luis100 May 6, 2026
7ceac86
feat(catalogue): add virtual catalogue for email archives and individ…
luis100 May 6, 2026
51c85d0
fix(catalogue): fix email column rendering and enable advanced search…
luis100 May 7, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 15 additions & 13 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ roda/

1. **Java 21** (Oracle JDK) — strictly required for compilation
2. **Maven 3.8.6+** — build tool
3. **Docker & Docker Compose** — for running Solr, PostgreSQL, LDAP, etc.
3. **Docker** — required for running the application locally and for tests (via Testcontainers — no manual `docker compose` needed for tests)
4. **GitHub account with PAT** — required for GitHub Packages dependency resolution

**Configure Maven for GitHub Packages** (`~/.m2/settings.xml`):
Expand Down Expand Up @@ -112,6 +112,10 @@ The PAT must have `read:packages` permission. Without this, the build will fail

### Starting Development Dependencies

**For running tests:** No manual setup needed — tests use **Testcontainers** (`TestContainersManager`) which automatically starts ZooKeeper, Solr, PostgreSQL, Mailpit, ClamAV, and Siegfried as ephemeral Docker containers. The `RodaContainersLifecycleListener` TestNG suite listener wires this up before any test class loads. Docker must be running on the host, but no `docker compose` command is required.

**For running the application locally:**

```bash
# Create required data directories
mkdir -p $HOME/.roda/data/{storage,staging-storage}
Expand All @@ -120,7 +124,7 @@ mkdir -p $HOME/.roda/data/{storage,staging-storage}
docker compose -f deploys/standalone/docker-compose-dev.yaml up -d
```

Services and ports:
Services and ports (for local app, not tests):
- ZooKeeper: `2181`
- Apache Solr: `8983`
- PostgreSQL: `5432`
Expand Down Expand Up @@ -196,25 +200,23 @@ mvn -f dev/codeserver gwt:codeserver -DrodaPath=$(pwd)

### Running Tests

Tests use **Testcontainers** — no environment variables or `docker compose` setup required. Docker must be available on the host.

```bash
# All tests (requires Docker services running)
mvn clean test
# All tests
mvn clean test -Pcore

# CI subset only (faster)
mvn -Dtestng.groups="travis" -Denforcer.skip=true clean org.jacoco:jacoco-maven-plugin:prepare-agent test
mvn -Dtestng.groups="travis" -Denforcer.skip=true clean org.jacoco:jacoco-maven-plugin:prepare-agent test -Pcore

# Specific test class
mvn -pl roda-core/roda-core-tests -am test -Dtest=NestedDocumentSearchTest -Dtestng.groups=dev -Denforcer.skip=true -DfailIfNoTests=false

# Skip tests
mvn clean package -Dmaven.test.skip=true
```

### Required Environment Variables (for tests matching CI)

```
RODA_CORE_SOLR_TYPE=CLOUD
SIEGFRIED_MODE=standalone
```

See `.github/workflows/CI.yml` for the full CI test environment configuration.
See `.github/workflows/CI.yml` for the full CI configuration.

### Key Test Classes

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,15 @@ public enum DateGranularity {

public static final String UI_LISTS_PAGE_SIZE_INITIAL = "pageSize.initial";
public static final String UI_LISTS_PAGE_SIZE_INCREMENT = "pageSize.increment";
public static final String UI_LISTS_INCLUDE_NESTED_DOCUMENTS = "includeNestedDocuments";

public static final String UI_CATALOGUE_VIRTUAL_PROPERTY = "ui.catalogue.virtual";
public static final String UI_LISTS_CATALOGUE_LABEL_I18N_PROPERTY = "catalogue.label.i18n";
public static final String UI_LISTS_CATALOGUE_ICON_PROPERTY = "catalogue.icon";
public static final String UI_LISTS_CATALOGUE_FILTER = "catalogue.filter";
public static final String UI_LISTS_CATALOGUE_CHILDOF_FILTER = "catalogue.childOf.filter";
public static final String UI_LISTS_CATALOGUE_CLICK_ACTION = "catalogue.click_action";
public static final String UI_LISTS_CATALOGUE_CLICK_ACTION_BROWSE_PARENT = "browse_parent";

public static final String UI_ICONS_CLASS = "ui.icons.class";
public static final String UI_SERVICE_DROPFOLDER_URL = "ui.service.dropfolder.url";
Expand Down Expand Up @@ -1980,6 +1989,8 @@ public String toString() {
public static final String SEARCH_FIELD_TYPE_SUGGEST_FIELD = "suggestField";
public static final String SEARCH_FIELD_TYPE_SUGGEST_PARTIAL = "suggestPartial";
public static final String SEARCH_FIELD_TYPE_CONTROLLED = "controlled";
public static final String SEARCH_FIELD_NESTED_TYPE = "nestedType";
public static final String SEARCH_FIELD_NESTED_PARENT_TYPE = "nestedParentType";
public static final String SEARCH_WITH_PREFILTER_HANDLER = "$prefilter";
public static final String SEARCH_WITH_SAVED_HANDLER = "$savedSearch";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ public class FindRequest extends CountRequest {
private Long childrenLimit;
@JsonProperty("childrenFilter")
private Filter childrenFilter;
@JsonProperty("includeNestedDocuments")
private boolean includeNestedDocuments;

// Private constructor for Jackson deserialization
private FindRequest(FindRequestBuilder builder) {
Expand All @@ -76,6 +78,7 @@ private FindRequest(FindRequestBuilder builder) {
this.childrenFieldsToReturn = builder.childrenFieldsToReturn;
this.childrenLimit = builder.childrenLimit;
this.childrenFilter = builder.childrenFilter;
this.includeNestedDocuments = builder.includeNestedDocuments;
}

public Sorter getSorter() {
Expand Down Expand Up @@ -122,6 +125,10 @@ public Filter getChildrenFilter() {
return childrenFilter;
}

public boolean isIncludeNestedDocuments() {
return includeNestedDocuments;
}

public static FindRequestBuilder getBuilder(final Filter filter, boolean onlyActive) {
return new FindRequestBuilder(filter, onlyActive);
}
Expand All @@ -141,6 +148,7 @@ public static class FindRequestBuilder {
private List<String> childrenFieldsToReturn;
private Long childrenLimit;
private Filter childrenFilter;
private boolean includeNestedDocuments;

public FindRequestBuilder(@JsonProperty("filter") final Filter filter,
@JsonProperty("onlyActive") boolean onlyActive) {
Expand All @@ -155,6 +163,7 @@ public FindRequestBuilder(@JsonProperty("filter") final Filter filter,
this.fieldsToReturn = Collections.emptyList();
this.collapse = null;
this.children = false;
this.includeNestedDocuments = false;
}

public FindRequest build() {
Expand Down Expand Up @@ -215,5 +224,10 @@ public FindRequestBuilder withChildrenFilter(Filter childrenFilter) {
this.childrenFilter = childrenFilter;
return this;
}

public FindRequestBuilder withIncludeNestedDocuments(boolean includeNestedDocuments) {
this.includeNestedDocuments = includeNestedDocuments;
return this;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ public final class CorporaConstants {

public static final String SOURCE_DESC_METADATA_CONTAINER = "DescriptiveMetadata";
public static final String STRANGE_DESC_METADATA_FILE = "strange.xml";
public static final String EMAIL_ARCHIVE_FULL_FILE = "emailarchive_full.xml";
public static final String EMAIL_ARCHIVE_MINIMAL_FILE = "emailarchive_minimal.xml";
public static final String EMAIL_ARCHIVE_NO_EMAILS_FILE = "emailarchive_no_emails.xml";
public static final String EMAIL_ARCHIVE_METADATA_TYPE = "emailarchive";

public static final String TEXT_XML = "text/xml";
public static final String REPRESENTATION_1_PREMIS_EVENT_ID = "urn:roda:premis:event:roda_398";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE file at the root of the source
* tree and available online at
*
* https://github.com/keeps/roda
*/
package org.roda.core.index;

import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertNotNull;
import static org.testng.AssertJUnit.assertNull;

import java.io.IOException;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Collection;

import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.roda.core.CorporaConstants;
import org.roda.core.RodaCoreFactory;
import org.roda.core.data.common.RodaConstants;
import org.roda.core.data.exceptions.GenericException;
import org.roda.core.data.exceptions.NotFoundException;
import org.roda.core.data.exceptions.RODAException;
import org.roda.core.index.utils.SolrUtils;
import org.roda.core.storage.Binary;
import org.roda.core.storage.DefaultStoragePath;
import org.roda.core.storage.StorageService;
import org.roda.core.storage.fs.FileStorageService;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;

@Test(groups = {RodaConstants.TEST_GROUP_ALL, RodaConstants.TEST_GROUP_DEV, RodaConstants.TEST_GROUP_TRAVIS})
public class EmailArchiveCrosswalkTest {

private static StorageService corporaService;

@BeforeClass
public static void setUp() throws URISyntaxException, GenericException {
URL corporaURL = IndexServiceTest.class.getResource("/corpora");
Path corporaPath = Paths.get(corporaURL.toURI());
corporaService = new FileStorageService(corporaPath);
}

@BeforeMethod
public void init() {
RodaCoreFactory.instantiateTest(false, false, false, false, false, false, false);
}

@AfterMethod
public void cleanup() throws NotFoundException, GenericException, IOException {
RodaCoreFactory.shutdown();
}

// ---------------------------------------------------------------------------
// Full fixture — 3 emails
// ---------------------------------------------------------------------------

@Test
public void testFullCrosswalkProducesParentFields() throws RODAException {
SolrInputDocument doc = getCrosswalkResult(CorporaConstants.EMAIL_ARCHIVE_FULL_FILE);

assertNotNull(doc);
assertFieldValue(doc, "custodian_txt", "João Silva");
assertFieldValue(doc, "emailAddress_s", "joao.silva@empresa.pt");
assertFieldValue(doc, "totalMessages_i", "3");
assertFieldValue(doc, "originalFormat_s", "PST");
assertFieldValue(doc, "archivingMotive_txt", "Offboarding");
assertFieldValue(doc, "content_type", "emailarchive");
}

@Test
public void testFullCrosswalkProducesDateFields() throws RODAException {
SolrInputDocument doc = getCrosswalkResult(CorporaConstants.EMAIL_ARCHIVE_FULL_FILE);

assertNotNull(doc);
assertFieldValue(doc, "dateStart_dt", "2020-01-01T00:00:00Z");
assertFieldValue(doc, "dateEnd_dt", "2023-12-31T00:00:00Z");
}

@Test
public void testFullCrosswalkProducesThreeChildDocuments() throws RODAException {
SolrInputDocument doc = getCrosswalkResult(CorporaConstants.EMAIL_ARCHIVE_FULL_FILE);

assertNotNull(doc);
SolrInputField emailsField = doc.getField("emails");
assertNotNull("'emails' field must be present for nested children", emailsField);

Collection<SolrInputDocument> children = getChildDocuments(emailsField);
assertEquals("Expected 3 child email documents", 3, children.size());
}

@Test
public void testFullCrosswalkFirstChildFields() throws RODAException {
SolrInputDocument doc = getCrosswalkResult(CorporaConstants.EMAIL_ARCHIVE_FULL_FILE);
SolrInputDocument first = getChildAt(doc, 0);

assertFieldValue(first, "content_type", "email");
assertFieldValue(first, "messageId_s", "<msg001@empresa.pt>");
assertFieldValue(first, "subject_txt", "Quarterly Report Q1 2021");
assertFieldValue(first, "sender_s", "joao.silva@empresa.pt");
assertFieldValue(first, "sentDate_dt", "2021-03-15T09:42:00Z");
assertFieldValue(first, "folderPath_s", "Inbox/Projects");
assertFieldValue(first, "hasAttachments_b", "true");
assertFieldValue(first, "filePath_s", "Inbox/Projects/msg_001.eml");
}

@Test
public void testFullCrosswalkMultipleRecipients() throws RODAException {
SolrInputDocument doc = getCrosswalkResult(CorporaConstants.EMAIL_ARCHIVE_FULL_FILE);
SolrInputDocument first = getChildAt(doc, 0);

// First email has two recipients: ana.costa and rui.pinto
SolrInputField recipientsField = first.getField("recipients_txt");
assertNotNull("recipients_txt field must be present", recipientsField);
Collection<?> values = recipientsField.getValues();
assertNotNull(values);
assertEquals("Expected 2 recipient values", 2, values.size());
}

@Test
public void testFullCrosswalkThirdChildFields() throws RODAException {
SolrInputDocument doc = getCrosswalkResult(CorporaConstants.EMAIL_ARCHIVE_FULL_FILE);
SolrInputDocument third = getChildAt(doc, 2);

assertFieldValue(third, "subject_txt", "Budget Approval Request");
assertFieldValue(third, "folderPath_s", "Sent");
assertFieldValue(third, "filePath_s", "Sent/msg_003.eml");
}

// ---------------------------------------------------------------------------
// Minimal fixture — 1 email, only required fields
// ---------------------------------------------------------------------------

@Test
public void testMinimalCrosswalkProducesRequiredFields() throws RODAException {
SolrInputDocument doc = getCrosswalkResult(CorporaConstants.EMAIL_ARCHIVE_MINIMAL_FILE);

assertNotNull(doc);
assertFieldValue(doc, "custodian_txt", "Jane Doe");
assertFieldValue(doc, "emailAddress_s", "jane.doe@example.org");
assertFieldValue(doc, "content_type", "emailarchive");
}

@Test
public void testMinimalCrosswalkOmitsAbsentOptionalFields() throws RODAException {
SolrInputDocument doc = getCrosswalkResult(CorporaConstants.EMAIL_ARCHIVE_MINIMAL_FILE);

assertNotNull(doc);
assertNull("dateStart_dt should be absent when not in source", doc.getField("dateStart_dt"));
assertNull("dateEnd_dt should be absent when not in source", doc.getField("dateEnd_dt"));
assertNull("totalMessages_i should be absent when not in source", doc.getField("totalMessages_i"));
assertNull("originalFormat_s should be absent when not in source", doc.getField("originalFormat_s"));
assertNull("archivingMotive_txt should be absent when not in source", doc.getField("archivingMotive_txt"));
}

@Test
public void testMinimalCrosswalkProducesOneChild() throws RODAException {
SolrInputDocument doc = getCrosswalkResult(CorporaConstants.EMAIL_ARCHIVE_MINIMAL_FILE);
SolrInputField emailsField = doc.getField("emails");
assertNotNull(emailsField);
assertEquals(1, getChildDocuments(emailsField).size());
}

@Test
public void testMinimalCrosswalkChildHasRequiredFields() throws RODAException {
SolrInputDocument doc = getCrosswalkResult(CorporaConstants.EMAIL_ARCHIVE_MINIMAL_FILE);
SolrInputDocument child = getChildAt(doc, 0);

assertFieldValue(child, "content_type", "email");
assertFieldValue(child, "messageId_s", "<only-email@example.org>");
assertFieldValue(child, "subject_txt", "Hello World");
assertFieldValue(child, "hasAttachments_b", "false");
}

// ---------------------------------------------------------------------------
// No-emails fixture — mailbox with zero email records
// ---------------------------------------------------------------------------

@Test
public void testNoEmailsCrosswalkProducesParentFieldsOnly() throws RODAException {
SolrInputDocument doc = getCrosswalkResult(CorporaConstants.EMAIL_ARCHIVE_NO_EMAILS_FILE);

assertNotNull(doc);
assertFieldValue(doc, "custodian_txt", "Empty Mailbox User");
assertFieldValue(doc, "content_type", "emailarchive");
assertFieldValue(doc, "totalMessages_i", "0");
}

@Test
public void testNoEmailsCrosswalkProducesNoChildDocumentsField() throws RODAException {
SolrInputDocument doc = getCrosswalkResult(CorporaConstants.EMAIL_ARCHIVE_NO_EMAILS_FILE);
assertNull("'emails' field must be absent when there are no child emails", doc.getField("emails"));
}

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

private SolrInputDocument getCrosswalkResult(String filename) throws RODAException {
try {
DefaultStoragePath path = DefaultStoragePath.parse(
CorporaConstants.SOURCE_DESC_METADATA_CONTAINER, filename);
Binary binary = corporaService.getBinary(path);
return SolrUtils.getDescriptiveMetadataFields(binary, CorporaConstants.EMAIL_ARCHIVE_METADATA_TYPE, null);
} catch (Exception e) {
Assert.fail("Unexpected exception loading fixture '" + filename + "': " + e.getMessage());
return null;
}
}

private void assertFieldValue(SolrInputDocument doc, String fieldName, String expectedValue) {
SolrInputField field = doc.getField(fieldName);
assertNotNull("Field '" + fieldName + "' must be present", field);
assertEquals("Field '" + fieldName + "' value mismatch", expectedValue, field.getValue().toString());
}

@SuppressWarnings("unchecked")
private Collection<SolrInputDocument> getChildDocuments(SolrInputField emailsField) {
Object value = emailsField.getValue();
assertNotNull("'emails' field value must not be null", value);
return (Collection<SolrInputDocument>) value;
}

private SolrInputDocument getChildAt(SolrInputDocument parent, int index) {
SolrInputField emailsField = parent.getField("emails");
assertNotNull(emailsField);
Collection<SolrInputDocument> children = getChildDocuments(emailsField);
return children.stream().skip(index).findFirst()
.orElseThrow(() -> new AssertionError("No child document at index " + index));
}
}
Loading
Loading