Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@
import jakarta.inject.Inject;

import org.apache.james.backends.cassandra.utils.CassandraAsyncExecutor;
import org.apache.james.blob.api.BlobId;
import org.apache.james.blob.api.BucketName;
import org.apache.james.blob.api.PlainBlobId;
import org.apache.james.core.Username;
import org.apache.james.mailbox.model.MessageId;

Expand All @@ -47,15 +47,13 @@ public class StorageInformationDAO {
private final PreparedStatement addStatement;
private final PreparedStatement removeStatement;
private final PreparedStatement readStatement;
private final BlobId.Factory blobIdFactory;

@Inject
StorageInformationDAO(CqlSession session, BlobId.Factory blobIdFactory) {
StorageInformationDAO(CqlSession session) {
this.cassandraAsyncExecutor = new CassandraAsyncExecutor(session);
this.addStatement = prepareAdd(session);
this.removeStatement = prepareRemove(session);
this.readStatement = prepareRead(session);
this.blobIdFactory = blobIdFactory;
}

private PreparedStatement prepareRead(CqlSession session) {
Expand Down Expand Up @@ -102,6 +100,6 @@ Mono<StorageInformation> retrieveStorageInformation(Username username, MessageId
.setString(MESSAGE_ID, messageId.serialize()))
.map(row -> StorageInformation.builder()
.bucketName(BucketName.of(row.getString(BUCKET_NAME)))
.blobId(blobIdFactory.parse(row.getString(BLOB_ID))));
.blobId(new PlainBlobId(row.getString(BLOB_ID))));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@

import org.apache.james.backends.cassandra.CassandraCluster;
import org.apache.james.backends.cassandra.CassandraClusterExtension;
import org.apache.james.blob.api.PlainBlobId;
import org.apache.james.mailbox.inmemory.InMemoryId;
import org.apache.james.mailbox.inmemory.InMemoryMessageId;
import org.apache.james.vault.dto.DeletedMessageWithStorageInformationConverter;
Expand All @@ -56,12 +55,11 @@ public class CassandraDeletedMessageMetadataVaultTest implements DeletedMessageM

@BeforeEach
void setUp(CassandraCluster cassandra) {
PlainBlobId.Factory blobIdFactory = new PlainBlobId.Factory();
InMemoryMessageId.Factory messageIdFactory = new InMemoryMessageId.Factory();
DeletedMessageWithStorageInformationConverter dtoConverter = new DeletedMessageWithStorageInformationConverter(blobIdFactory, messageIdFactory, new InMemoryId.Factory());
DeletedMessageWithStorageInformationConverter dtoConverter = new DeletedMessageWithStorageInformationConverter(messageIdFactory, new InMemoryId.Factory());

metadataDAO = new MetadataDAO(cassandra.getConf(), messageIdFactory, new MetadataSerializer(dtoConverter));
storageInformationDAO = new StorageInformationDAO(cassandra.getConf(), blobIdFactory);
storageInformationDAO = new StorageInformationDAO(cassandra.getConf());
userPerBucketDAO = new UserPerBucketDAO(cassandra.getConf());

testee = new CassandraDeletedMessageMetadataVault(metadataDAO, storageInformationDAO, userPerBucketDAO);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@

import org.apache.james.backends.cassandra.CassandraCluster;
import org.apache.james.backends.cassandra.CassandraClusterExtension;
import org.apache.james.blob.api.PlainBlobId;
import org.apache.james.mailbox.inmemory.InMemoryId;
import org.apache.james.mailbox.inmemory.InMemoryMessageId;
import org.apache.james.mailbox.model.MessageId;
Expand All @@ -49,7 +48,8 @@ class MetadataDAOTest {
@BeforeEach
void setUp(CassandraCluster cassandra) {
DeletedMessageWithStorageInformationConverter dtoConverter = new DeletedMessageWithStorageInformationConverter(
new PlainBlobId.Factory(), new InMemoryMessageId.Factory(), new InMemoryId.Factory());
new InMemoryMessageId.Factory(),
new InMemoryId.Factory());

testee = new MetadataDAO(cassandra.getConf(), new InMemoryMessageId.Factory(),
new MetadataSerializer(dtoConverter));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
class StorageInformationDAOTest {
private static final BucketName BUCKET_NAME = BucketName.of("deletedMessages-2019-06-01");
private static final BucketName BUCKET_NAME_2 = BucketName.of("deletedMessages-2019-07-01");
private static final PlainBlobId.Factory BLOB_ID_FACTORY = new PlainBlobId.Factory();
private static final Username OWNER = Username.of("owner");
private static final TestMessageId MESSAGE_ID = TestMessageId.of(36);
private static final BlobId BLOB_ID = new PlainBlobId.Factory().parse("05dcb33b-8382-4744-923a-bc593ad84d23");
Expand All @@ -54,7 +53,7 @@ class StorageInformationDAOTest {

@BeforeEach
void setUp(CassandraCluster cassandra) {
testee = new StorageInformationDAO(cassandra.getConf(), BLOB_ID_FACTORY);
testee = new StorageInformationDAO(cassandra.getConf());
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
import jakarta.inject.Inject;

import org.apache.james.backends.postgres.utils.PostgresExecutor;
import org.apache.james.blob.api.BlobId;
import org.apache.james.blob.api.BucketName;
import org.apache.james.blob.api.PlainBlobId;
import org.apache.james.core.Username;
import org.apache.james.mailbox.model.MessageId;
import org.jooq.Record;
Expand All @@ -46,15 +46,12 @@
public class PostgresDeletedMessageMetadataVault implements DeletedMessageMetadataVault {
private final PostgresExecutor postgresExecutor;
private final MetadataSerializer metadataSerializer;
private final BlobId.Factory blobIdFactory;

@Inject
public PostgresDeletedMessageMetadataVault(PostgresExecutor postgresExecutor,
MetadataSerializer metadataSerializer,
BlobId.Factory blobIdFactory) {
MetadataSerializer metadataSerializer) {
this.postgresExecutor = postgresExecutor;
this.metadataSerializer = metadataSerializer;
this.blobIdFactory = blobIdFactory;
}

@Override
Expand Down Expand Up @@ -93,7 +90,7 @@ public Publisher<StorageInformation> retrieveStorageInformation(Username usernam
private Function<Record, StorageInformation> toStorageInformation() {
return record -> StorageInformation.builder()
.bucketName(BucketName.of(record.get(BUCKET_NAME)))
.blobId(blobIdFactory.parse(record.get(BLOB_ID)));
.blobId(new PlainBlobId(record.get(BLOB_ID)));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@

import org.apache.james.backends.postgres.PostgresDataDefinition;
import org.apache.james.backends.postgres.PostgresExtension;
import org.apache.james.blob.api.BlobId;
import org.apache.james.blob.api.PlainBlobId;
import org.apache.james.mailbox.inmemory.InMemoryId;
import org.apache.james.mailbox.inmemory.InMemoryMessageId;
import org.apache.james.vault.dto.DeletedMessageWithStorageInformationConverter;
Expand All @@ -35,13 +33,11 @@ class PostgresDeletedMessageMetadataVaultTest implements DeletedMessageMetadataV

@Override
public DeletedMessageMetadataVault metadataVault() {
BlobId.Factory blobIdFactory = new PlainBlobId.Factory();
InMemoryMessageId.Factory messageIdFactory = new InMemoryMessageId.Factory();
DeletedMessageWithStorageInformationConverter dtoConverter = new DeletedMessageWithStorageInformationConverter(blobIdFactory,
DeletedMessageWithStorageInformationConverter dtoConverter = new DeletedMessageWithStorageInformationConverter(
messageIdFactory, new InMemoryId.Factory());

return new PostgresDeletedMessageMetadataVault(postgresExtension.getDefaultPostgresExecutor(),
new MetadataSerializer(dtoConverter),
blobIdFactory);
new MetadataSerializer(dtoConverter));
}
}
13 changes: 13 additions & 0 deletions mailbox/plugin/deleted-messages-vault/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@
<artifactId>blob-memory</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>${james.groupId}</groupId>
<artifactId>blob-storage-strategy</artifactId>
</dependency>

<dependency>
<groupId>${james.groupId}</groupId>
Expand All @@ -96,6 +100,15 @@
<groupId>${james.groupId}</groupId>
<artifactId>james-server-core</artifactId>
</dependency>
<dependency>
<groupId>${james.groupId}</groupId>
<artifactId>james-server-data-api</artifactId>
</dependency>
<dependency>
<groupId>${james.groupId}</groupId>
<artifactId>james-server-data-memory</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>${james.groupId}</groupId>
<artifactId>james-server-task-json</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,32 +31,38 @@
import com.google.common.base.Preconditions;

public class VaultConfiguration {
public static final String DEFAULT_SINGLE_BUCKET_NAME = "james-deleted-message-vault";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this allow nesting below root ?
Imagine I can or want to have a single s3 bucket for all james related stuff. My bucket is free-hosting-foo-23402
how do I configure the Vault so that deleted messages are nested under
free-hosting-foo-23402/james-deleted-message-vault/xxxx
so I can have the mailqueue and other s3 backed store write in other subdirectories ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No the bucket is (by default) james-deleted-message-vault
Here it is not a subfolder of the main S3 bucket.
The goal of this work is not to get a single bucket
But operate distributed james server off 3 buckets (default, james-uploads, james-deleted-message-vault)
(So as an Ops I need to create 3 buckets and not one each month)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think using the term Bucket or bucket name for the blobstore was a mistake. it overlaps in a bad way with the s3 notions.

Here you are manipulating a blobstore bucket.
I think this can be stored as a subpath of a single underlying S3 bucket by using the prefix property on the S3 configuration
This is all very confusing (to me at least) as this prefix only allows to shift the whole namespace in which case it is impossible to split storage of some use cases among different bucket

On a more direct musing I think the previous design influences the naming too much here, the term single will become confusing quite fast. I think I would use DEFAULT_VAULT_BUCKET instead

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think using the term Bucket or bucket name for the blobstore was a mistake.

+1

But as said CF previous refactoring that may not be something easy that we are willing to change.

I think this can be stored as a subpath of a single underlying S3 bucket

I don't want this to mess up with mailbox GC. I'd rather have a separate blob store bucket if given the choice.

Also a separate blob store bucket makes loads of sense as the storage constraints are not the standard james ones (glacier?)

I think I would use DEFAULT_VAULT_BUCKET instead

Cf comment above.

Copy link
Contributor Author

@Arsnael Arsnael Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought we agreed that this work for now just allows going from multiple S3 buckets to just one for the vault only, but it does not put all blobs under the root bucket.

The goal in the end would be to put all blobs under one S3 bucket yes. But that's not what this PR intends to do. That would require more changes and discussions.

Is it still unclear on that point @jeantil ?

Copy link
Contributor

@jeantil jeantil Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it still unclear on that point @jeantil ?

no that's perfectly clear

The goal in the end would be to put all blobs under one S3 bucket yes.

This can be achieved today using the prefix or namespace configuration of blob.properties (sorry I don't exactly remember which I had to use, this is not so well documented) but is not really my point.

I think the constant should not contain the word SINGLE as this only makes sense in the current refactoring context. if you were building this feature from scratch today based on the new design you would not call it single.

As I understand it this is the Blobstore bucket name (so a logical bucket name) under which all the blobs of the feature implemented by the plugin will be written. The actual S3 bucket name in which this logical bucket is stored can be different based on the blob.properties configuration ( prefix and namespace will affect the actual storage namespace)

Hence my proposal to call it DEFAULT_VAULT_BUCKET (by no means definitive)

I don't want this to mess up with mailbox GC. I'd rather have a separate blob store bucket if given the choice.

I'm not sure how my proposed name change or how using the prefix/namespace features of blob.properties changes would afffect the mailbox GC ... I propose we discuss this with actual examples on the ADR. This goes beyond my intention for the PR.

public static final VaultConfiguration DEFAULT =
new VaultConfiguration(false, ChronoUnit.YEARS.getDuration(), DefaultMailboxes.RESTORED_MESSAGES);
new VaultConfiguration(false, ChronoUnit.YEARS.getDuration(), DefaultMailboxes.RESTORED_MESSAGES, DEFAULT_SINGLE_BUCKET_NAME);
public static final VaultConfiguration ENABLED_DEFAULT =
new VaultConfiguration(true, ChronoUnit.YEARS.getDuration(), DefaultMailboxes.RESTORED_MESSAGES);
new VaultConfiguration(true, ChronoUnit.YEARS.getDuration(), DefaultMailboxes.RESTORED_MESSAGES, DEFAULT_SINGLE_BUCKET_NAME);

public static VaultConfiguration from(Configuration propertiesConfiguration) {
Duration retentionPeriod = Optional.ofNullable(propertiesConfiguration.getString("retentionPeriod"))
.map(string -> DurationParser.parse(string, ChronoUnit.DAYS))
.orElse(DEFAULT.getRetentionPeriod());
String restoreLocation = Optional.ofNullable(propertiesConfiguration.getString("restoreLocation"))
.orElse(DEFAULT.getRestoreLocation());
String singleBucketName = Optional.ofNullable(propertiesConfiguration.getString("singleBucketName"))
.orElse(DEFAULT.getSingleBucketName());
boolean enabled = propertiesConfiguration.getBoolean("enabled", false);
return new VaultConfiguration(enabled, retentionPeriod, restoreLocation);
return new VaultConfiguration(enabled, retentionPeriod, restoreLocation, singleBucketName);
}

private final boolean enabled;
private final Duration retentionPeriod;
private final String restoreLocation;
private final String singleBucketName;

VaultConfiguration(boolean enabled, Duration retentionPeriod, String restoreLocation) {
VaultConfiguration(boolean enabled, Duration retentionPeriod, String restoreLocation, String singleBucketName) {
this.enabled = enabled;
Preconditions.checkNotNull(retentionPeriod);
Preconditions.checkNotNull(restoreLocation);
Preconditions.checkNotNull(singleBucketName);

this.retentionPeriod = retentionPeriod;
this.restoreLocation = restoreLocation;
this.singleBucketName = singleBucketName;
}

public boolean isEnabled() {
Expand All @@ -71,20 +77,25 @@ public String getRestoreLocation() {
return restoreLocation;
}

public String getSingleBucketName() {
return singleBucketName;
}

@Override
public final boolean equals(Object o) {
if (o instanceof VaultConfiguration) {
VaultConfiguration that = (VaultConfiguration) o;

return Objects.equals(this.retentionPeriod, that.retentionPeriod)
&& Objects.equals(this.restoreLocation, that.restoreLocation)
&& Objects.equals(this.enabled, that.enabled);
&& Objects.equals(this.enabled, that.enabled)
&& Objects.equals(this.singleBucketName, that.singleBucketName);
}
return false;
}

@Override
public final int hashCode() {
return Objects.hash(retentionPeriod, restoreLocation, enabled);
return Objects.hash(retentionPeriod, restoreLocation, enabled, singleBucketName);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/****************************************************************
* Licensed to the Apache Software Foundation (ASF) under one *
* or more contributor license agreements. See the NOTICE file *
* distributed with this work for additional information *
* regarding copyright ownership. The ASF licenses this file *
* to you under the Apache License, Version 2.0 (the *
* "License"); you may not use this file except in compliance *
* with the License. You may obtain a copy of the License at *
* *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, *
* software distributed under the License is distributed on an *
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
* KIND, either express or implied. See the License for the *
* specific language governing permissions and limitations *
* under the License. *
****************************************************************/

package org.apache.james.vault.blob;

import java.time.Clock;
import java.time.ZonedDateTime;
import java.util.UUID;

import org.apache.commons.lang3.RandomStringUtils;
import org.apache.james.blob.api.BlobId;
import org.apache.james.blob.api.PlainBlobId;

public class BlobIdTimeGenerator {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand why the whole PR goes around the BlobId.Factory design 🤷

Copy link
Contributor

@chibenwa chibenwa Jan 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please re read the interface.

BlobId.Factory is about *parsing* BlobId

This work is about *generating* them.

Maybe I am missing something, despite investing several hours on the topic. Maybe a hand on example could help?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I completely miss the point of BlobId.Factory.

My understanding is that the Factory is responsible for creating instances of BlobId that point to a specific object (generate BlobIds), either by wrapping/decorating of an id or by parsing parse a storage path and extracting the logical id.

In this case you are encoding custom paths as a PlainBlobId bypassing the factory which is not consistent with how things are done in the rest of the codebase.

I was kinda expecting a TimePrefixedBlobIdFactory (don't focus on this specific name) which would encapsulate the path manipulation. I may misunderstand by your path manipulation appears to be very similar to what is done in MailRepositoryBlobIdFactory though for a difference use case.

Copy link
Contributor

@chibenwa chibenwa Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hello @jeantil

My understanding is that the Factory is responsible for creating instances of BlobId that point to a specific object (generate BlobIds)

public interface BlobId {

    interface Factory {
        BlobId of(String id);

        BlobId parse(String id);
    }

    String asString();
}

I see no generate method.

Generation is an actual responsibility of the BlobStore, or of its caller.

Proof:

At the minimum a concrete clarification would be a s/of/wrap/g or s/of/decorate/g - if this sounds relevant to others I agree opening sucha PR.

Please note that in the example taken: BlobMailRepository:

      val blobId = blobIdFactory.of(mailKey.asString())

The decision to rely on a MailKey as the storage identifier is not ecoded in the BlobId.Factory either.

I was kinda expecting a TimePrefixedBlobIdFactory

TBH as we are not actively using the date information encoded in the object name this present little added value to operate of a "typed" blobId.

The only use of information encoded by the blobId to a useful process is to my knowledge the GC to leverage a grace period cf

.flatMap(blobId -> Mono.fromCallable(() -> blobIdFactory.parse(blobId.asString())))

Also my consern about "typed" BlobId is that it interoperate badly.

The beauty of the proposed design here (PlainBloId factory) is that we naturally handle historical data without needing some complex fallback logic for an information that is not actually needed. The BlobMailRepository thing was a new contributed code change and did not need to care with historical data. Here the vault needs to deal with data that do not fit the expected TimePrefixedBlobIdFactory formalism.

So how do we deal with this:

  • optional field in the blob id so that temporal info can go missing or be incorrect ?
  • TimePrefixedBlobIdFactory actually fallback to PlainBlobId when temporal info is missing or incorrect ?
  • Or as proposed here because we do not need that info to act on we just avoid writing itchy code and proceed with PlainBlobId everywhere ?

Cheers.

public static final String BLOB_ID_GENERATING_FORMAT = "%d/%02d/%s/%s/%s";

static BlobId currentBlobId(Clock clock) {
ZonedDateTime now = ZonedDateTime.now(clock);
int month = now.getMonthValue();
int year = now.getYear();
String randomizerA = RandomStringUtils.insecure().nextAlphanumeric(1);
String randomizerB = RandomStringUtils.insecure().nextAlphanumeric(1);

return new PlainBlobId(String.format(BLOB_ID_GENERATING_FORMAT, year, month, randomizerA, randomizerB, UUID.randomUUID()));
}
}
Loading