From 4b8455f2e4e5572764b93d1bc7f81cda798a7f81 Mon Sep 17 00:00:00 2001 From: Aleksei Ieshin Date: Mon, 23 Feb 2026 17:50:10 +1100 Subject: [PATCH 1/7] Storage tiering. Add storage policy model --- .../hdds/protocol/OzoneStoragePolicy.java | 111 ++++++++++++++++++ .../apache/hadoop/ozone/om/OMConfigKeys.java | 13 ++ .../hdds/protocol/TestOzoneStoragePolicy.java | 92 +++++++++++++++ .../src/main/proto/OmClientProtocol.proto | 7 ++ .../apache/hadoop/ozone/om/OzoneManager.java | 10 ++ 5 files changed, 233 insertions(+) create mode 100644 hadoop-ozone/common/src/main/java/org/apache/hadoop/hdds/protocol/OzoneStoragePolicy.java create mode 100644 hadoop-ozone/common/src/test/java/org/apache/hadoop/hdds/protocol/TestOzoneStoragePolicy.java diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/hdds/protocol/OzoneStoragePolicy.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/hdds/protocol/OzoneStoragePolicy.java new file mode 100644 index 000000000000..2bb3b2c981ab --- /dev/null +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/hdds/protocol/OzoneStoragePolicy.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.protocol; + +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.StoragePolicyProto; + +/** + * Enum representing named storage policies that map semantic intent + * (HOT, WARM, COLD) to physical {@link StorageType} values. + * + * + */ +public enum OzoneStoragePolicy { + + HOT(StorageType.SSD, StorageType.DISK), + WARM(StorageType.DISK, null), + COLD(StorageType.ARCHIVE, null); + + private final StorageType primaryType; + private final StorageType fallbackType; + + OzoneStoragePolicy(StorageType primaryType, StorageType fallbackType) { + this.primaryType = primaryType; + this.fallbackType = fallbackType; + } + + public StorageType getPrimaryStorageType() { + return primaryType; + } + + /** + * Returns the fallback storage type, or {@code null} if no fallback + * is available (write fails when primary is unavailable). + */ + public StorageType getFallbackStorageType() { + return fallbackType; + } + + public static OzoneStoragePolicy getDefault() { + return WARM; + } + + public StoragePolicyProto toProto() { + switch (this) { + case HOT: + return StoragePolicyProto.HOT; + case WARM: + return StoragePolicyProto.WARM; + case COLD: + return StoragePolicyProto.COLD; + default: + throw new IllegalStateException( + "BUG: OzoneStoragePolicy not found, policy=" + this); + } + } + + /** + * Converts a protobuf {@link StoragePolicyProto} to the corresponding + * {@link OzoneStoragePolicy}. Returns {@code null} for + * {@link StoragePolicyProto#STORAGE_POLICY_UNSET}, which means + * "not set / inherit from parent". + */ + public static OzoneStoragePolicy fromProto(StoragePolicyProto proto) { + if (proto == null) { + return null; + } + switch (proto) { + case HOT: + return HOT; + case WARM: + return WARM; + case COLD: + return COLD; + case STORAGE_POLICY_UNSET: + return null; + default: + throw new IllegalStateException( + "BUG: StoragePolicyProto not found, proto=" + proto); + } + } + + /** + * Case-insensitive parse from string. Intended for CLI / config parsing. + * + * @param policy the policy name (e.g. "hot", "WARM") + * @return the matching {@link OzoneStoragePolicy} + * @throws IllegalArgumentException if the string does not match any policy + */ + public static OzoneStoragePolicy fromString(String policy) { + return valueOf(policy.toUpperCase()); + } +} diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java index b831af6d9c0c..a2fc4cb7b60f 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java @@ -20,6 +20,7 @@ import java.util.concurrent.TimeUnit; import org.apache.hadoop.hdds.client.ReplicationFactor; import org.apache.hadoop.hdds.client.ReplicationType; +import org.apache.hadoop.hdds.protocol.OzoneStoragePolicy; import org.apache.hadoop.ozone.om.helpers.BucketLayout; import org.apache.ratis.util.TimeDuration; @@ -373,6 +374,18 @@ public final class OMConfigKeys { public static final String OZONE_BUCKET_LAYOUT_OBJECT_STORE = BucketLayout.OBJECT_STORE.name(); + // Storage policy tiering feature toggle. + public static final String OZONE_STORAGE_POLICY_ENABLED = + "ozone.storage.policy.enabled"; + public static final boolean OZONE_STORAGE_POLICY_ENABLED_DEFAULT = false; + + // Default storage policy used by Ozone Manager when a client does not + // specify a storage policy. + public static final String OZONE_DEFAULT_STORAGE_POLICY = + "ozone.default.storage.policy"; + public static final String OZONE_DEFAULT_STORAGE_POLICY_DEFAULT = + OzoneStoragePolicy.WARM.name(); + /** * Configuration properties for Directory Deleting Service. */ diff --git a/hadoop-ozone/common/src/test/java/org/apache/hadoop/hdds/protocol/TestOzoneStoragePolicy.java b/hadoop-ozone/common/src/test/java/org/apache/hadoop/hdds/protocol/TestOzoneStoragePolicy.java new file mode 100644 index 000000000000..09c3b2259a3e --- /dev/null +++ b/hadoop-ozone/common/src/test/java/org/apache/hadoop/hdds/protocol/TestOzoneStoragePolicy.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.protocol; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.StoragePolicyProto; +import org.junit.jupiter.api.Test; + +/** + * Tests for {@link OzoneStoragePolicy}. + */ +public class TestOzoneStoragePolicy { + + @Test + public void defaultPolicyIsWarm() { + assertEquals(OzoneStoragePolicy.WARM, OzoneStoragePolicy.getDefault()); + } + + @Test + public void primaryStorageTypes() { + assertEquals(StorageType.SSD, + OzoneStoragePolicy.HOT.getPrimaryStorageType()); + assertEquals(StorageType.DISK, + OzoneStoragePolicy.WARM.getPrimaryStorageType()); + assertEquals(StorageType.ARCHIVE, + OzoneStoragePolicy.COLD.getPrimaryStorageType()); + } + + @Test + public void fallbackStorageTypes() { + assertEquals(StorageType.DISK, + OzoneStoragePolicy.HOT.getFallbackStorageType()); + assertNull(OzoneStoragePolicy.WARM.getFallbackStorageType()); + assertNull(OzoneStoragePolicy.COLD.getFallbackStorageType()); + } + + @Test + public void toProtoRoundTrip() { + for (OzoneStoragePolicy policy : OzoneStoragePolicy.values()) { + StoragePolicyProto proto = policy.toProto(); + OzoneStoragePolicy recovered = OzoneStoragePolicy.fromProto(proto); + assertEquals(policy, recovered); + } + } + + @Test + public void fromProtoUnsetReturnsNull() { + assertNull(OzoneStoragePolicy.fromProto( + StoragePolicyProto.STORAGE_POLICY_UNSET)); + } + + @Test + public void fromProtoNullReturnsNull() { + assertNull(OzoneStoragePolicy.fromProto(null)); + } + + @Test + public void fromStringCaseInsensitive() { + assertEquals(OzoneStoragePolicy.HOT, + OzoneStoragePolicy.fromString("HOT")); + assertEquals(OzoneStoragePolicy.HOT, + OzoneStoragePolicy.fromString("hot")); + assertEquals(OzoneStoragePolicy.WARM, + OzoneStoragePolicy.fromString("Warm")); + assertEquals(OzoneStoragePolicy.COLD, + OzoneStoragePolicy.fromString("cold")); + } + + @Test + public void fromStringInvalidThrows() { + assertThrows(IllegalArgumentException.class, + () -> OzoneStoragePolicy.fromString("INVALID")); + } +} diff --git a/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto b/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto index bdb3cc3cee35..f20d9abbcdc2 100644 --- a/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto +++ b/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto @@ -782,6 +782,13 @@ enum BucketLayoutProto { OBJECT_STORE = 3; } +enum StoragePolicyProto { + STORAGE_POLICY_UNSET = 0; + HOT = 1; + WARM = 2; + COLD = 3; +} + /** * Cipher suite. */ diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java index 7a9d66f86dff..90adb19a74fa 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java @@ -53,6 +53,8 @@ import static org.apache.hadoop.ozone.OzoneConsts.TRANSACTION_INFO_KEY; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_DEFAULT_BUCKET_LAYOUT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_DEFAULT_BUCKET_LAYOUT_DEFAULT; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_DEFAULT_STORAGE_POLICY; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_DEFAULT_STORAGE_POLICY_DEFAULT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_KEY_DELETING_LIMIT_PER_TASK; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_ADDRESS_KEY; @@ -172,6 +174,7 @@ import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.ReconfigurationHandler; +import org.apache.hadoop.hdds.protocol.OzoneStoragePolicy; import org.apache.hadoop.hdds.protocol.SecretKeyProtocol; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.ReconfigureProtocolProtos.ReconfigureProtocolService; @@ -4780,6 +4783,13 @@ public BucketLayout getOMDefaultBucketLayout() { return this.defaultBucketLayout; } + public OzoneStoragePolicy getDefaultStoragePolicy() { + String policyString = configuration.getTrimmed( + OZONE_DEFAULT_STORAGE_POLICY, + OZONE_DEFAULT_STORAGE_POLICY_DEFAULT); + return OzoneStoragePolicy.fromString(policyString); + } + /** * Create volume which is required for S3Gateway operations. */ From 849f026c2446df5553b956393ab207d65998f711 Mon Sep 17 00:00:00 2001 From: Aleksei Ieshin Date: Mon, 23 Feb 2026 18:13:52 +1100 Subject: [PATCH 2/7] Storage tiering. Add storage policy for bucket --- .../org/apache/hadoop/ozone/OzoneConsts.java | 1 + .../hadoop/ozone/client/BucketArgs.java | 14 +++++++ .../hadoop/ozone/client/OzoneBucket.java | 14 +++++++ .../hadoop/ozone/client/rpc/RpcClient.java | 7 ++++ .../hadoop/ozone/om/helpers/OmBucketArgs.java | 24 ++++++++++++ .../hadoop/ozone/om/helpers/OmBucketInfo.java | 29 +++++++++++++- .../ozone/om/helpers/TestOmBucketArgs.java | 32 ++++++++++++++++ .../ozone/om/helpers/TestOmBucketInfo.java | 38 +++++++++++++++++++ .../src/main/proto/OmClientProtocol.proto | 2 + .../bucket/OMBucketSetPropertyRequest.java | 7 ++++ 10 files changed, 167 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java index 3bd8388f9500..c0f94b4d5533 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java @@ -289,6 +289,7 @@ public final class OzoneConsts { public static final String MAX_NUM_OF_BUCKETS = "maxNumOfBuckets"; public static final String HAS_SNAPSHOT = "hasSnapshot"; public static final String STORAGE_TYPE = "storageType"; + public static final String STORAGE_POLICY = "storagePolicy"; public static final String RESOURCE_TYPE = "resourceType"; public static final String IS_VERSION_ENABLED = "isVersionEnabled"; public static final String CREATION_TIME = "creationTime"; diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/BucketArgs.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/BucketArgs.java index f4173963dd6e..4e7814a00e59 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/BucketArgs.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/BucketArgs.java @@ -25,6 +25,7 @@ import java.util.Map; import net.jcip.annotations.Immutable; import org.apache.hadoop.hdds.client.DefaultReplicationConfig; +import org.apache.hadoop.hdds.protocol.OzoneStoragePolicy; import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.ozone.OzoneAcl; import org.apache.hadoop.ozone.OzoneConsts; @@ -69,6 +70,8 @@ public final class BucketArgs { private final String owner; + private final OzoneStoragePolicy storagePolicy; + /** * Bucket Layout. */ @@ -86,6 +89,7 @@ private BucketArgs(Builder b) { quotaInNamespace = b.quotaInNamespace; bucketLayout = b.bucketLayout; owner = b.owner; + storagePolicy = b.storagePolicy; defaultReplicationConfig = b.defaultReplicationConfig; } @@ -185,6 +189,10 @@ public String getOwner() { return owner; } + public OzoneStoragePolicy getStoragePolicy() { + return storagePolicy; + } + /** * Builder for OmBucketInfo. */ @@ -200,6 +208,7 @@ public static class Builder { private long quotaInNamespace; private BucketLayout bucketLayout; private String owner; + private OzoneStoragePolicy storagePolicy; private DefaultReplicationConfig defaultReplicationConfig; public Builder() { @@ -268,6 +277,11 @@ public BucketArgs.Builder setOwner(String ownerName) { return this; } + public BucketArgs.Builder setStoragePolicy(OzoneStoragePolicy policy) { + storagePolicy = policy; + return this; + } + public BucketArgs.Builder setDefaultReplicationConfig( DefaultReplicationConfig defaultReplConfig) { defaultReplicationConfig = defaultReplConfig; diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneBucket.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneBucket.java index 566427424223..15e067136943 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneBucket.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneBucket.java @@ -43,6 +43,7 @@ import org.apache.hadoop.hdds.client.ReplicationFactor; import org.apache.hadoop.hdds.client.ReplicationType; import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.protocol.OzoneStoragePolicy; import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.scm.client.HddsClientUtils; import org.apache.hadoop.ozone.OmUtils; @@ -152,6 +153,8 @@ public class OzoneBucket extends WithMetadata { */ private String owner; + private OzoneStoragePolicy storagePolicy; + protected OzoneBucket(Builder builder) { super(builder); this.proxy = builder.proxy; @@ -191,6 +194,7 @@ protected OzoneBucket(Builder builder) { this.bucketLayout = builder.bucketLayout; } this.owner = builder.owner; + this.storagePolicy = builder.storagePolicy; } /** @@ -300,6 +304,10 @@ public String getOwner() { return owner; } + public OzoneStoragePolicy getStoragePolicy() { + return storagePolicy; + } + public int getListCacheSize() { return listCacheSize; } @@ -1127,6 +1135,7 @@ public static class Builder extends WithMetadata.Builder { private long quotaInNamespace; private BucketLayout bucketLayout; private String owner; + private OzoneStoragePolicy storagePolicy; protected Builder() { } @@ -1223,6 +1232,11 @@ public Builder setOwner(String owner) { return this; } + public Builder setStoragePolicy(OzoneStoragePolicy storagePolicy) { + this.storagePolicy = storagePolicy; + return this; + } + public OzoneBucket build() { return new OzoneBucket(this); } diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java index 3947e4b6818b..c56c079c3e4f 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java @@ -76,6 +76,7 @@ import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.OzoneStoragePolicy; import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.ContainerClientMetrics; @@ -657,6 +658,10 @@ public void createBucket( .setBucketLayout(bucketLayout) .setOwner(owner); + if (bucketArgs.getStoragePolicy() != null) { + builder.setStoragePolicy(bucketArgs.getStoragePolicy()); + } + if (bucketArgs.getAcls() != null) { builder.acls().addAll(bucketArgs.getAcls()); } @@ -1303,6 +1308,7 @@ public OzoneBucket getBucketDetails( .setQuotaInNamespace(bucketInfo.getQuotaInNamespace()) .setBucketLayout(bucketInfo.getBucketLayout()) .setOwner(bucketInfo.getOwner()) + .setStoragePolicy(bucketInfo.getStoragePolicy()) .setDefaultReplicationConfig(bucketInfo.getDefaultReplicationConfig()) .build(); } @@ -1334,6 +1340,7 @@ public List listBuckets(String volumeName, String bucketPrefix, .setQuotaInNamespace(bucket.getQuotaInNamespace()) .setBucketLayout(bucket.getBucketLayout()) .setOwner(bucket.getOwner()) + .setStoragePolicy(bucket.getStoragePolicy()) .setDefaultReplicationConfig( bucket.getDefaultReplicationConfig()) .build()) diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmBucketArgs.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmBucketArgs.java index 6491a2ec146c..09dc88b97251 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmBucketArgs.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmBucketArgs.java @@ -21,6 +21,7 @@ import java.util.Map; import java.util.Objects; import org.apache.hadoop.hdds.client.DefaultReplicationConfig; +import org.apache.hadoop.hdds.protocol.OzoneStoragePolicy; import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.audit.Auditable; @@ -63,6 +64,8 @@ public final class OmBucketArgs extends WithMetadata implements Auditable { */ private final String ownerName; + private final OzoneStoragePolicy storagePolicy; + private OmBucketArgs(Builder b) { super(b); this.volumeName = b.volumeName; @@ -76,6 +79,7 @@ private OmBucketArgs(Builder b) { this.quotaInNamespaceSet = b.quotaInNamespaceSet; this.quotaInNamespace = quotaInNamespaceSet ? b.quotaInNamespace : OzoneConsts.QUOTA_RESET; this.bekInfo = b.bekInfo; + this.storagePolicy = b.storagePolicy; } /** @@ -160,6 +164,10 @@ public String getOwnerName() { return ownerName; } + public OzoneStoragePolicy getStoragePolicy() { + return storagePolicy; + } + /** * Returns new builder class that builds a OmBucketArgs. * @return Builder @@ -183,6 +191,9 @@ public Map toAuditMap() { if (this.ownerName != null) { auditMap.put(OzoneConsts.OWNER, this.ownerName); } + if (this.storagePolicy != null) { + auditMap.put(OzoneConsts.STORAGE_POLICY, this.storagePolicy.name()); + } if (this.quotaInBytesSet && quotaInBytes > 0 || (this.quotaInBytes != OzoneConsts.QUOTA_RESET)) { auditMap.put(OzoneConsts.QUOTA_IN_BYTES, @@ -222,6 +233,7 @@ public static class Builder extends WithMetadata.Builder { private BucketEncryptionKeyInfo bekInfo; private DefaultReplicationConfig defaultReplicationConfig; private String ownerName; + private OzoneStoragePolicy storagePolicy; /** * Constructs a builder. @@ -288,6 +300,11 @@ public Builder setOwnerName(String owner) { return this; } + public Builder setStoragePolicy(OzoneStoragePolicy policy) { + this.storagePolicy = policy; + return this; + } + /** * Constructs the OmBucketArgs. * @return instance of OmBucketArgs. @@ -326,6 +343,9 @@ public BucketArgs getProtobuf() { if (ownerName != null) { builder.setOwnerName(ownerName); } + if (storagePolicy != null) { + builder.setStoragePolicy(storagePolicy.toProto()); + } if (bekInfo != null) { builder.setBekInfo(OMPBHelper.convert(bekInfo)); @@ -353,6 +373,10 @@ public static Builder builderFromProtobuf(BucketArgs bucketArgs) { if (bucketArgs.hasOwnerName()) { builder.setOwnerName(bucketArgs.getOwnerName()); } + if (bucketArgs.hasStoragePolicy()) { + builder.setStoragePolicy( + OzoneStoragePolicy.fromProto(bucketArgs.getStoragePolicy())); + } if (bucketArgs.hasDefaultReplicationConfig()) { builder.setDefaultReplicationConfig( diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmBucketInfo.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmBucketInfo.java index bce6adb636a0..df1ae3793c4a 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmBucketInfo.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmBucketInfo.java @@ -24,6 +24,7 @@ import java.util.Objects; import java.util.stream.Collectors; import org.apache.hadoop.hdds.client.DefaultReplicationConfig; +import org.apache.hadoop.hdds.protocol.OzoneStoragePolicy; import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.utils.db.Codec; import org.apache.hadoop.hdds.utils.db.CopyObject; @@ -33,6 +34,7 @@ import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.audit.Auditable; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.BucketInfo; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.StoragePolicyProto; import org.apache.hadoop.ozone.protocolPB.OMPBHelper; /** @@ -107,6 +109,8 @@ public final class OmBucketInfo extends WithObjectID implements Auditable, CopyO private final String owner; + private final OzoneStoragePolicy storagePolicy; + private OmBucketInfo(Builder b) { super(b); this.volumeName = b.volumeName; @@ -128,6 +132,7 @@ private OmBucketInfo(Builder b) { this.bucketLayout = b.bucketLayout; this.owner = b.owner; this.defaultReplicationConfig = b.defaultReplicationConfig; + this.storagePolicy = b.storagePolicy; } public static Codec getCodec() { @@ -303,6 +308,10 @@ public String getOwner() { return owner; } + public OzoneStoragePolicy getStoragePolicy() { + return storagePolicy; + } + /** * Returns new builder class that builds a OmBucketInfo. * @@ -341,6 +350,8 @@ public Map toAuditMap() { auditMap.put(OzoneConsts.SNAPSHOT_USED_BYTES, String.valueOf(this.snapshotUsedBytes)); auditMap.put(OzoneConsts.SNAPSHOT_USED_NAMESPACE, String.valueOf(this.snapshotUsedNamespace)); auditMap.put(OzoneConsts.OWNER, this.owner); + auditMap.put(OzoneConsts.STORAGE_POLICY, + (this.storagePolicy != null) ? this.storagePolicy.name() : null); auditMap.put(OzoneConsts.REPLICATION_TYPE, (this.defaultReplicationConfig != null) ? String.valueOf(this.defaultReplicationConfig.getType()) : null); @@ -378,7 +389,8 @@ public Builder toBuilder() { .setSnapshotUsedNamespace(snapshotUsedNamespace) .setBucketLayout(bucketLayout) .setOwner(owner) - .setDefaultReplicationConfig(defaultReplicationConfig); + .setDefaultReplicationConfig(defaultReplicationConfig) + .setStoragePolicy(storagePolicy); } /** @@ -401,6 +413,7 @@ public static class Builder extends WithObjectID.Builder { private long quotaInNamespace = OzoneConsts.QUOTA_RESET; private BucketLayout bucketLayout = BucketLayout.DEFAULT; private String owner; + private OzoneStoragePolicy storagePolicy; private DefaultReplicationConfig defaultReplicationConfig; private long snapshotUsedBytes; private long snapshotUsedNamespace; @@ -544,6 +557,11 @@ public Builder setOwner(String ownerName) { return this; } + public Builder setStoragePolicy(OzoneStoragePolicy policy) { + this.storagePolicy = policy; + return this; + } + public Builder setDefaultReplicationConfig( DefaultReplicationConfig defaultReplConfig) { this.defaultReplicationConfig = defaultReplConfig; @@ -604,6 +622,9 @@ public BucketInfo getProtobuf() { if (owner != null) { bib.setOwner(owner); } + if (storagePolicy != null) { + bib.setStoragePolicy(storagePolicy.toProto()); + } return bib.build(); } @@ -673,6 +694,11 @@ public static Builder builderFromProtobuf(BucketInfo bucketInfo, if (bucketInfo.hasOwner()) { obib.setOwner(bucketInfo.getOwner()); } + if (bucketInfo.hasStoragePolicy() + && bucketInfo.getStoragePolicy() != StoragePolicyProto.STORAGE_POLICY_UNSET) { + obib.setStoragePolicy( + OzoneStoragePolicy.fromProto(bucketInfo.getStoragePolicy())); + } return obib; } @@ -745,6 +771,7 @@ public boolean equals(Object o) { Objects.equals(getMetadata(), that.getMetadata()) && Objects.equals(bekInfo, that.bekInfo) && Objects.equals(owner, that.owner) && + Objects.equals(storagePolicy, that.storagePolicy) && Objects.equals(defaultReplicationConfig, that.defaultReplicationConfig); } diff --git a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmBucketArgs.java b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmBucketArgs.java index 147255b3b573..141a13100600 100644 --- a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmBucketArgs.java +++ b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmBucketArgs.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hdds.client.DefaultReplicationConfig; import org.apache.hadoop.hdds.client.ECReplicationConfig; +import org.apache.hadoop.hdds.protocol.OzoneStoragePolicy; import org.junit.jupiter.api.Test; /** @@ -90,4 +91,35 @@ public void testDefaultReplicationConfigIsSetCorrectly() { assertEquals(EC, argsFromProto.getDefaultReplicationConfig().getType()); } + + @Test + public void testStoragePolicyRoundTrip() { + OmBucketArgs bucketArgs = OmBucketArgs.newBuilder() + .setBucketName("bucket") + .setVolumeName("volume") + .setStoragePolicy(OzoneStoragePolicy.HOT) + .build(); + + assertEquals(OzoneStoragePolicy.HOT, bucketArgs.getStoragePolicy()); + + OmBucketArgs argsFromProto = OmBucketArgs.getFromProtobuf( + bucketArgs.getProtobuf()); + + assertEquals(OzoneStoragePolicy.HOT, argsFromProto.getStoragePolicy()); + } + + @Test + public void testStoragePolicyNullByDefault() { + OmBucketArgs bucketArgs = OmBucketArgs.newBuilder() + .setBucketName("bucket") + .setVolumeName("volume") + .build(); + + assertNull(bucketArgs.getStoragePolicy()); + + OmBucketArgs argsFromProto = OmBucketArgs.getFromProtobuf( + bucketArgs.getProtobuf()); + + assertNull(argsFromProto.getStoragePolicy()); + } } diff --git a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmBucketInfo.java b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmBucketInfo.java index 857103a20c0d..e578f49c48bd 100644 --- a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmBucketInfo.java +++ b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmBucketInfo.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hdds.client.ECReplicationConfig; import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.client.ReplicationType; +import org.apache.hadoop.hdds.protocol.OzoneStoragePolicy; import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.ozone.OzoneAcl; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; @@ -147,4 +148,41 @@ public void getProtobufMessageEC() { recovered.getDefaultReplicationConfig().getReplicationConfig(); assertEquals(new ECReplicationConfig(3, 2), config); } + + @Test + public void protobufConversionWithStoragePolicy() { + OmBucketInfo bucket = OmBucketInfo.newBuilder() + .setBucketName("bucket") + .setVolumeName("vol1") + .setCreationTime(1L) + .setIsVersionEnabled(false) + .setStorageType(StorageType.ARCHIVE) + .setStoragePolicy(OzoneStoragePolicy.COLD) + .build(); + + assertEquals(OzoneStoragePolicy.COLD, bucket.getStoragePolicy()); + + OmBucketInfo recovered = + OmBucketInfo.getFromProtobuf(bucket.getProtobuf()); + assertEquals(bucket, recovered); + assertEquals(OzoneStoragePolicy.COLD, recovered.getStoragePolicy()); + } + + @Test + public void protobufConversionWithoutStoragePolicy() { + OmBucketInfo bucket = OmBucketInfo.newBuilder() + .setBucketName("bucket") + .setVolumeName("vol1") + .setCreationTime(1L) + .setIsVersionEnabled(false) + .setStorageType(StorageType.ARCHIVE) + .build(); + + assertNull(bucket.getStoragePolicy()); + + OmBucketInfo recovered = + OmBucketInfo.getFromProtobuf(bucket.getProtobuf()); + assertEquals(bucket, recovered); + assertNull(recovered.getStoragePolicy()); + } } diff --git a/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto b/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto index f20d9abbcdc2..531c72146f11 100644 --- a/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto +++ b/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto @@ -774,6 +774,7 @@ message BucketInfo { optional hadoop.hdds.DefaultReplicationConfig defaultReplicationConfig = 20; optional uint64 snapshotUsedBytes = 21; optional uint64 snapshotUsedNamespace = 22; + optional StoragePolicyProto storagePolicy = 23; } enum BucketLayoutProto { @@ -854,6 +855,7 @@ message BucketArgs { optional string ownerName = 10; optional hadoop.hdds.DefaultReplicationConfig defaultReplicationConfig = 11; optional BucketEncryptionInfoProto bekInfo = 12; + optional StoragePolicyProto storagePolicy = 13; } message PrefixInfo { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/bucket/OMBucketSetPropertyRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/bucket/OMBucketSetPropertyRequest.java index a88e5fb73334..a3b4e3be745d 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/bucket/OMBucketSetPropertyRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/bucket/OMBucketSetPropertyRequest.java @@ -25,6 +25,7 @@ import java.util.Objects; import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; import org.apache.hadoop.hdds.client.DefaultReplicationConfig; +import org.apache.hadoop.hdds.protocol.OzoneStoragePolicy; import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.utils.db.cache.CacheKey; import org.apache.hadoop.hdds.utils.db.cache.CacheValue; @@ -172,6 +173,12 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, Execut bucketName, volumeName); } + //Check StoragePolicy to update + OzoneStoragePolicy storagePolicy = omBucketArgs.getStoragePolicy(); + if (storagePolicy != null) { + bucketInfoBuilder.setStoragePolicy(storagePolicy); + } + //Check Versioning to update Boolean versioning = omBucketArgs.getIsVersionEnabled(); if (versioning != null) { From 6512c9512de57c8014dba1aa8bb53e64781a9aad Mon Sep 17 00:00:00 2001 From: Aleksei Ieshin Date: Mon, 23 Feb 2026 18:35:42 +1100 Subject: [PATCH 3/7] Storage tiering. Resolve policy at write time --- .../om/request/file/OMFileCreateRequest.java | 7 ++++++- .../om/request/key/OMAllocateBlockRequest.java | 8 +++++++- .../ozone/om/request/key/OMKeyCreateRequest.java | 7 ++++++- .../hadoop/ozone/om/request/key/OMKeyRequest.java | 15 ++++++++++++++- .../ozone/om/request/key/TestOMKeyRequest.java | 2 ++ 5 files changed, 35 insertions(+), 4 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/file/OMFileCreateRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/file/OMFileCreateRequest.java index 9788cfbafe17..fcaaf34ff72b 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/file/OMFileCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/file/OMFileCreateRequest.java @@ -32,6 +32,7 @@ import java.util.Objects; import java.util.stream.Collectors; import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList; import org.apache.hadoop.hdds.utils.UniqueId; @@ -121,6 +122,9 @@ public OMRequest preExecute(OzoneManager ozoneManager) throws IOException { bucketInfo.getDefaultReplicationConfig(), ozoneManager); + final StorageType storageType = resolveEffectiveStoragePolicy( + bucketInfo, ozoneManager).getPrimaryStorageType(); + // TODO: Here we are allocating block with out any check for // bucket/key/volume or not and also with out any authorization checks. // We also allocate block even if requestedSize is 0 because unlike @@ -138,7 +142,8 @@ public OMRequest preExecute(OzoneManager ozoneManager) throws IOException { ozoneManager.getOMServiceId(), ozoneManager.getMetrics(), keyArgs.getSortDatanodes(), - userInfo); + userInfo, + storageType); KeyArgs.Builder newKeyArgs = keyArgs.toBuilder() .setModificationTime(Time.now()).setType(type).setFactor(factor) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMAllocateBlockRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMAllocateBlockRequest.java index b692cf9d55eb..7b31f41c392a 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMAllocateBlockRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMAllocateBlockRequest.java @@ -28,6 +28,7 @@ import java.util.Map; import java.util.Objects; import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList; import org.apache.hadoop.hdds.utils.db.cache.CacheKey; import org.apache.hadoop.hdds.utils.db.cache.CacheValue; @@ -107,6 +108,10 @@ public OMRequest preExecute(OzoneManager ozoneManager) throws IOException { UserInfo userInfo = getUserIfNotExists(ozoneManager); ReplicationConfig repConfig = ReplicationConfig.fromProto(keyArgs.getType(), keyArgs.getFactor(), keyArgs.getEcReplicationConfig()); + final OmBucketInfo bucketInfo = ozoneManager + .getBucketInfo(keyArgs.getVolumeName(), keyArgs.getBucketName()); + final StorageType storageType = resolveEffectiveStoragePolicy( + bucketInfo, ozoneManager).getPrimaryStorageType(); // To allocate atleast one block passing requested size and scmBlockSize // as same value. When allocating block requested size is same as // scmBlockSize. @@ -117,7 +122,8 @@ public OMRequest preExecute(OzoneManager ozoneManager) throws IOException { ozoneManager.getPreallocateBlocksMax(), ozoneManager.isGrpcBlockTokenEnabled(), ozoneManager.getOMServiceId(), ozoneManager.getMetrics(), - keyArgs.getSortDatanodes(), userInfo); + keyArgs.getSortDatanodes(), userInfo, + storageType); // Set modification time and normalize key if required. KeyArgs.Builder newKeyArgs = diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyCreateRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyCreateRequest.java index d34320ecb8db..ada368235a40 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyCreateRequest.java @@ -32,6 +32,7 @@ import java.util.Objects; import java.util.stream.Collectors; import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList; import org.apache.hadoop.hdds.utils.UniqueId; @@ -141,6 +142,9 @@ public OMRequest preExecute(OzoneManager ozoneManager) throws IOException { bucketInfo.getDefaultReplicationConfig(), ozoneManager); + final StorageType storageType = resolveEffectiveStoragePolicy( + bucketInfo, ozoneManager).getPrimaryStorageType(); + // TODO: Here we are allocating block with out any check for // bucket/key/volume or not and also with out any authorization checks. // As for a client for the first time this can be executed on any OM, @@ -163,7 +167,8 @@ public OMRequest preExecute(OzoneManager ozoneManager) throws IOException { ozoneManager.getOMServiceId(), ozoneManager.getMetrics(), keyArgs.getSortDatanodes(), - userInfo)); + userInfo, + storageType)); effectiveDataSize = requestedSize; } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyRequest.java index 7df2619e9e46..afaa3d197eb3 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyRequest.java @@ -55,6 +55,8 @@ import org.apache.hadoop.hdds.client.ContainerBlockID; import org.apache.hadoop.hdds.client.ECReplicationConfig; import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.protocol.OzoneStoragePolicy; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.container.common.helpers.AllocatedBlock; import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList; @@ -190,7 +192,8 @@ protected List< OmKeyLocationInfo > allocateBlock(ScmClient scmClient, ReplicationConfig replicationConfig, ExcludeList excludeList, long requestedSize, long scmBlockSize, int preallocateBlocksMax, boolean grpcBlockTokenEnabled, String serviceID, OMMetrics omMetrics, - boolean shouldSortDatanodes, UserInfo userInfo) + boolean shouldSortDatanodes, UserInfo userInfo, + StorageType storageType) throws IOException { int dataGroupSize = replicationConfig instanceof ECReplicationConfig ? ((ECReplicationConfig) replicationConfig).getData() : 1; @@ -204,6 +207,8 @@ protected List< OmKeyLocationInfo > allocateBlock(ScmClient scmClient, List locationInfos = new ArrayList<>(numBlocks); String remoteUser = getRemoteUser().getShortUserName(); + LOG.debug("Allocating block with storageType={} for replication={}", + storageType, replicationConfig); List allocatedBlocks; try { allocatedBlocks = scmClient.getBlockClient() @@ -234,6 +239,14 @@ protected List< OmKeyLocationInfo > allocateBlock(ScmClient scmClient, return locationInfos; } + protected static OzoneStoragePolicy resolveEffectiveStoragePolicy( + @Nullable OmBucketInfo bucketInfo, OzoneManager ozoneManager) { + if (bucketInfo != null && bucketInfo.getStoragePolicy() != null) { + return bucketInfo.getStoragePolicy(); + } + return ozoneManager.getDefaultStoragePolicy(); + } + /* Optimize ugi lookup for RPC operations to avoid a trip through * UGI.getCurrentUser which is synch'ed. */ diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java index b84294370c58..45842547934c 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java @@ -39,6 +39,7 @@ import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.client.ContainerBlockID; import org.apache.hadoop.hdds.client.RatisReplicationConfig; +import org.apache.hadoop.hdds.protocol.OzoneStoragePolicy; import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.client.StandaloneReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; @@ -164,6 +165,7 @@ public void setup() throws Exception { .thenReturn(true); when(ozoneManager.getBucketInfo(anyString(), anyString())).thenReturn( new OmBucketInfo.Builder().setVolumeName("").setBucketName("").build()); + when(ozoneManager.getDefaultStoragePolicy()).thenReturn(OzoneStoragePolicy.WARM); doNothing().when(auditLogger).logWrite(any(AuditMessage.class)); AuditMessage mockAuditMessage = mock(AuditMessage.class); From f304a251aa8bcda32d711cb82d4a6ab49d714c36 Mon Sep 17 00:00:00 2001 From: Aleksei Ieshin Date: Mon, 23 Feb 2026 19:14:14 +1100 Subject: [PATCH 4/7] Storage tiering. Pass storage type to SCM --- .../hadoop/hdds/protocol/StorageType.java | 0 .../protocol/ScmBlockLocationProtocol.java | 13 ++- ...ocationProtocolClientSideTranslatorPB.java | 8 +- .../src/main/proto/ScmServerProtocol.proto | 1 + .../hadoop/hdds/scm/block/BlockManager.java | 5 +- .../hdds/scm/block/BlockManagerImpl.java | 6 +- ...ocationProtocolServerSideTranslatorPB.java | 8 +- .../scm/server/SCMBlockProtocolServer.java | 8 +- .../hdds/scm/block/TestBlockManager.java | 35 ++++--- .../server/TestSCMBlockProtocolServer.java | 92 ++++++++++++++++++- .../ozone/om/request/key/OMKeyRequest.java | 2 +- .../om/ScmBlockLocationTestingClient.java | 4 +- 12 files changed, 154 insertions(+), 28 deletions(-) rename {hadoop-ozone => hadoop-hdds}/common/src/main/java/org/apache/hadoop/hdds/protocol/StorageType.java (100%) diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/hdds/protocol/StorageType.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/StorageType.java similarity index 100% rename from hadoop-ozone/common/src/main/java/org/apache/hadoop/hdds/protocol/StorageType.java rename to hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/StorageType.java diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/ScmBlockLocationProtocol.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/ScmBlockLocationProtocol.java index a34420b3de00..86a407bb6b94 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/ScmBlockLocationProtocol.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/ScmBlockLocationProtocol.java @@ -23,6 +23,7 @@ import java.util.concurrent.TimeoutException; import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; import org.apache.hadoop.hdds.scm.AddSCMRequest; @@ -87,7 +88,7 @@ default List allocateBlock(long size, int numBlocks, ReplicationConfig replicationConfig, String owner, ExcludeList excludeList) throws IOException { return allocateBlock(size, numBlocks, replicationConfig, owner, - excludeList, null); + excludeList, null, StorageType.DEFAULT); } /** @@ -107,9 +108,17 @@ default List allocateBlock(long size, int numBlocks, * @return allocated block accessing info (key, pipeline). * @throws IOException */ + default List allocateBlock(long size, int numBlocks, + ReplicationConfig replicationConfig, String owner, + ExcludeList excludeList, String clientMachine) throws IOException { + return allocateBlock(size, numBlocks, replicationConfig, owner, + excludeList, clientMachine, StorageType.DEFAULT); + } + List allocateBlock(long size, int numBlocks, ReplicationConfig replicationConfig, String owner, - ExcludeList excludeList, String clientMachine) throws IOException; + ExcludeList excludeList, String clientMachine, + StorageType storageType) throws IOException; /** * Delete blocks for a set of object keys. diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/ScmBlockLocationProtocolClientSideTranslatorPB.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/ScmBlockLocationProtocolClientSideTranslatorPB.java index ae4d58fd18f4..5fb1eaac0af3 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/ScmBlockLocationProtocolClientSideTranslatorPB.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/ScmBlockLocationProtocolClientSideTranslatorPB.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.AllocateBlockResponse; import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.AllocateScmBlockRequestProto; @@ -173,7 +174,8 @@ public List allocateBlock( long size, int num, ReplicationConfig replicationConfig, String owner, ExcludeList excludeList, - String clientMachine + String clientMachine, + StorageType storageType ) throws IOException { Preconditions.checkArgument(size > 0, "block size must be greater than 0"); @@ -189,6 +191,10 @@ public List allocateBlock( requestBuilder.setClient(clientMachine); } + if (storageType != null) { + requestBuilder.setStorageType(storageType.toProto()); + } + switch (replicationConfig.getReplicationType()) { case STAND_ALONE: requestBuilder.setFactor( diff --git a/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto b/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto index 4c794fe7dc18..a26e8acaeeca 100644 --- a/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto +++ b/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto @@ -161,6 +161,7 @@ message AllocateScmBlockRequestProto { optional string client = 9; + optional hadoop.hdds.StorageTypeProto storageType = 10; } /** diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManager.java index 54a1648f7c16..6ae0b9a00e84 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManager.java @@ -22,6 +22,7 @@ import java.util.List; import java.util.concurrent.TimeoutException; import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.scm.container.common.helpers.AllocatedBlock; import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList; import org.apache.hadoop.ozone.common.BlockGroup; @@ -42,8 +43,8 @@ public interface BlockManager extends Closeable { * @throws IOException */ AllocatedBlock allocateBlock(long size, ReplicationConfig replicationConfig, - String owner, - ExcludeList excludeList) throws IOException, TimeoutException; + String owner, ExcludeList excludeList, + StorageType storageType) throws IOException, TimeoutException; /** * Deletes a list of blocks in an atomic operation. Internally, SCM diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java index 6b0136abf664..9c375a6cec41 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hdds.client.ContainerBlockID; import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.scm.ScmConfig; import org.apache.hadoop.hdds.scm.ScmConfigKeys; @@ -145,11 +146,14 @@ public void stop() throws IOException { @Override public AllocatedBlock allocateBlock(final long size, ReplicationConfig replicationConfig, - String owner, ExcludeList excludeList) + String owner, ExcludeList excludeList, + StorageType storageType) throws IOException { if (LOG.isTraceEnabled()) { LOG.trace("Size : {} , replicationConfig: {}", size, replicationConfig); } + LOG.debug("Allocating block: size={}, replication={}, storageType={}", + size, replicationConfig, storageType); if (scm.getScmContext().isInSafeMode()) { throw new SCMException("SafeModePrecheck failed for allocateBlock", SCMException.ResultCodes.SAFE_MODE_EXCEPTION); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/ScmBlockLocationProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/ScmBlockLocationProtocolServerSideTranslatorPB.java index f9fa80bf42d5..981475c52e61 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/ScmBlockLocationProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/ScmBlockLocationProtocolServerSideTranslatorPB.java @@ -24,6 +24,7 @@ import java.util.stream.Collectors; import org.apache.hadoop.hdds.annotation.InterfaceAudience; import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.DatanodeDetails.Port.Name; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; @@ -193,6 +194,10 @@ private Status exceptionToResponseStatus(IOException ex) { public AllocateScmBlockResponseProto allocateScmBlock( AllocateScmBlockRequestProto request, int clientVersion) throws IOException { + StorageType storageType = request.hasStorageType() + ? StorageType.valueOf(request.getStorageType()) + : StorageType.DEFAULT; + List allocatedBlocks = impl.allocateBlock(request.getSize(), request.getNumBlocks(), @@ -202,7 +207,8 @@ public AllocateScmBlockResponseProto allocateScmBlock( request.getEcReplicationConfig()), request.getOwner(), ExcludeList.getFromProtoBuf(request.getExcludeList()), - request.getClient()); + request.getClient(), + storageType); AllocateScmBlockResponseProto.Builder builder = AllocateScmBlockResponseProto.newBuilder(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java index 1fc7c2d96b24..7a377b419c2c 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java @@ -47,6 +47,7 @@ import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.DatanodeID; import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos; import org.apache.hadoop.hdds.scm.AddSCMRequest; @@ -189,7 +190,8 @@ public List allocateBlock( long size, int num, ReplicationConfig replicationConfig, String owner, ExcludeList excludeList, - String clientMachine + String clientMachine, + StorageType storageType ) throws IOException { long startNanos = Time.monotonicNowNanos(); Map auditMap = Maps.newHashMap(); @@ -198,6 +200,7 @@ public List allocateBlock( auditMap.put("replication", replicationConfig.toString()); auditMap.put("owner", owner); auditMap.put("client", clientMachine); + auditMap.put("storageType", String.valueOf(storageType)); List blocks = new ArrayList<>(num); if (LOG.isDebugEnabled()) { @@ -207,7 +210,8 @@ public List allocateBlock( try { for (int i = 0; i < num; i++) { AllocatedBlock block = scm.getScmBlockManager() - .allocateBlock(size, replicationConfig, owner, excludeList); + .allocateBlock(size, replicationConfig, owner, excludeList, + storageType); if (block != null) { // Sort the datanodes if client machine is specified final Node client = getClientNode(clientMachine); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java index 45c947cb00a4..70e5b53c3d59 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; @@ -197,7 +198,8 @@ public void testAllocateBlock() throws Exception { pipelineManager.createPipeline(replicationConfig); HddsTestUtils.openAllRatisPipelines(pipelineManager); AllocatedBlock block = blockManager.allocateBlock(DEFAULT_BLOCK_SIZE, - replicationConfig, OzoneConsts.OZONE, new ExcludeList()); + replicationConfig, OzoneConsts.OZONE, new ExcludeList(), + StorageType.DEFAULT); assertNotNull(block); } @@ -216,7 +218,7 @@ public void testAllocateBlockWithExclusion() throws Exception { .get(0).getId()); AllocatedBlock block = blockManager .allocateBlock(DEFAULT_BLOCK_SIZE, replicationConfig, OzoneConsts.OZONE, - excludeList); + excludeList, StorageType.DEFAULT); assertNotNull(block); for (PipelineID id : excludeList.getPipelineIds()) { assertNotEquals(block.getPipeline().getId(), id); @@ -227,7 +229,7 @@ public void testAllocateBlockWithExclusion() throws Exception { } block = blockManager .allocateBlock(DEFAULT_BLOCK_SIZE, replicationConfig, OzoneConsts.OZONE, - excludeList); + excludeList, StorageType.DEFAULT); assertNotNull(block); assertThat(excludeList.getPipelineIds()).contains(block.getPipeline().getId()); } @@ -249,7 +251,7 @@ void testAllocateBlockInParallel() throws Exception { future.complete(blockManager .allocateBlock(DEFAULT_BLOCK_SIZE, replicationConfig, OzoneConsts.OZONE, - new ExcludeList())); + new ExcludeList(), StorageType.DEFAULT)); } catch (IOException e) { future.completeExceptionally(e); } @@ -287,7 +289,7 @@ void testBlockDistribution() throws Exception { AllocatedBlock block = blockManager .allocateBlock(DEFAULT_BLOCK_SIZE, replicationConfig, OzoneConsts.OZONE, - new ExcludeList()); + new ExcludeList(), StorageType.DEFAULT); long containerId = block.getBlockID().getContainerID(); if (!allocatedBlockMap.containsKey(containerId)) { blockList = new ArrayList<>(); @@ -343,7 +345,7 @@ void testBlockDistributionWithMultipleDisks() throws Exception { AllocatedBlock block = blockManager .allocateBlock(DEFAULT_BLOCK_SIZE, replicationConfig, OzoneConsts.OZONE, - new ExcludeList()); + new ExcludeList(), StorageType.DEFAULT); long containerId = block.getBlockID().getContainerID(); if (!allocatedBlockMap.containsKey(containerId)) { blockList = new ArrayList<>(); @@ -403,7 +405,7 @@ void testBlockDistributionWithMultipleRaftLogDisks() throws Exception { AllocatedBlock block = blockManager .allocateBlock(DEFAULT_BLOCK_SIZE, replicationConfig, OzoneConsts.OZONE, - new ExcludeList()); + new ExcludeList(), StorageType.DEFAULT); long containerId = block.getBlockID().getContainerID(); if (!allocatedBlockMap.containsKey(containerId)) { blockList = new ArrayList<>(); @@ -439,7 +441,8 @@ public void testAllocateOversizedBlock() { long size = 6 * GB; Throwable t = assertThrows(IOException.class, () -> blockManager.allocateBlock(size, - replicationConfig, OzoneConsts.OZONE, new ExcludeList())); + replicationConfig, OzoneConsts.OZONE, new ExcludeList(), + StorageType.DEFAULT)); assertEquals("Unsupported block size: " + size, t.getMessage()); } @@ -450,7 +453,8 @@ public void testAllocateBlockFailureInSafeMode() { // Test1: In safe mode expect an SCMException. Throwable t = assertThrows(IOException.class, () -> blockManager.allocateBlock(DEFAULT_BLOCK_SIZE, - replicationConfig, OzoneConsts.OZONE, new ExcludeList())); + replicationConfig, OzoneConsts.OZONE, new ExcludeList(), + StorageType.DEFAULT)); assertEquals("SafeModePrecheck failed for allocateBlock", t.getMessage()); } @@ -459,7 +463,8 @@ public void testAllocateBlockFailureInSafeMode() { public void testAllocateBlockSucInSafeMode() throws Exception { // Test2: Exit safe mode and then try allocateBock again. assertNotNull(blockManager.allocateBlock(DEFAULT_BLOCK_SIZE, - replicationConfig, OzoneConsts.OZONE, new ExcludeList())); + replicationConfig, OzoneConsts.OZONE, new ExcludeList(), + StorageType.DEFAULT)); } @Test @@ -472,14 +477,14 @@ public void testMultipleBlockAllocation() AllocatedBlock allocatedBlock = blockManager .allocateBlock(DEFAULT_BLOCK_SIZE, replicationConfig, OzoneConsts.OZONE, - new ExcludeList()); + new ExcludeList(), StorageType.DEFAULT); // block should be allocated in different pipelines GenericTestUtils.waitFor(() -> { try { AllocatedBlock block = blockManager .allocateBlock(DEFAULT_BLOCK_SIZE, replicationConfig, OzoneConsts.OZONE, - new ExcludeList()); + new ExcludeList(), StorageType.DEFAULT); return !block.getPipeline().getId() .equals(allocatedBlock.getPipeline().getId()); } catch (IOException e) { @@ -525,7 +530,7 @@ public void testMultipleBlockAllocationWithClosedContainer() blockManager .allocateBlock(DEFAULT_BLOCK_SIZE, replicationConfig, OzoneConsts.OZONE, - new ExcludeList()); + new ExcludeList(), StorageType.DEFAULT); } catch (IOException e) { } return verifyNumberOfContainersInPipelines( @@ -550,7 +555,7 @@ public void testMultipleBlockAllocationWithClosedContainer() blockManager .allocateBlock(DEFAULT_BLOCK_SIZE, replicationConfig, OzoneConsts.OZONE, - new ExcludeList()); + new ExcludeList(), StorageType.DEFAULT); } catch (IOException e) { } return verifyNumberOfContainersInPipelines( @@ -567,7 +572,7 @@ public void testBlockAllocationWithNoAvailablePipelines() assertEquals(0, pipelineManager.getPipelines(replicationConfig).size()); assertNotNull(blockManager .allocateBlock(DEFAULT_BLOCK_SIZE, replicationConfig, OzoneConsts.OZONE, - new ExcludeList())); + new ExcludeList(), StorageType.DEFAULT)); } private class DatanodeCommandHandler implements diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMBlockProtocolServer.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMBlockProtocolServer.java index 895baef27d6c..da13da4cd9b7 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMBlockProtocolServer.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMBlockProtocolServer.java @@ -45,8 +45,12 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.StorageTypeProto; import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos; +import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.AllocateScmBlockRequestProto; +import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.AllocateScmBlockResponseProto; import org.apache.hadoop.hdds.scm.HddsTestUtils; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.scm.block.BlockManager; import org.apache.hadoop.hdds.scm.block.DeletedBlockLog; import org.apache.hadoop.hdds.scm.block.DeletedBlockLogImpl; @@ -80,6 +84,7 @@ public class TestSCMBlockProtocolServer { private StorageContainerManager scm; private NodeManager nodeManager; private ScmBlockLocationProtocolServerSideTranslatorPB service; + private BlockManagerStub blockManagerStub; private static final int NODE_COUNT = 10; private static final Map EDGE_NODES = ImmutableMap.of( @@ -90,16 +95,23 @@ public class TestSCMBlockProtocolServer { private static class BlockManagerStub implements BlockManager { private final List datanodes; + private volatile StorageType lastStorageType; BlockManagerStub(List datanodes) { assertNotNull(datanodes, "Datanodes cannot be null"); this.datanodes = datanodes; } + StorageType getLastStorageType() { + return lastStorageType; + } + @Override public AllocatedBlock allocateBlock(long size, ReplicationConfig replicationConfig, String owner, - ExcludeList excludeList) throws IOException, TimeoutException { + ExcludeList excludeList, StorageType storageType) + throws IOException, TimeoutException { + this.lastStorageType = storageType; List nodes = new ArrayList<>(datanodes); Collections.shuffle(nodes); Pipeline pipeline; @@ -174,10 +186,11 @@ void setUp(@TempDir File dir) throws Exception { config.set(StaticMapping.KEY_HADOOP_CONFIGURED_NODE_MAPPING, String.join(",", nodeMapping)); + blockManagerStub = new BlockManagerStub(datanodes); SCMConfigurator configurator = new SCMConfigurator(); configurator.setSCMHAManager(SCMHAManagerStub.getInstance(true)); configurator.setScmContext(SCMContext.emptyContext()); - configurator.setScmBlockManager(new BlockManagerStub(datanodes)); + configurator.setScmBlockManager(blockManagerStub); scm = HddsTestUtils.getScm(config, configurator); scm.start(); scm.exitSafeMode(); @@ -338,6 +351,81 @@ void testAllocateBlockWithClientMachine() throws IOException { } } + @Test + void testAllocateBlockPassesStorageType() throws IOException { + final ReplicationConfig replicationConfig = RatisReplicationConfig + .getInstance(ReplicationFactor.THREE); + final long blockSize = 128 * MB; + + server.allocateBlock(blockSize, 1, replicationConfig, "o", + new ExcludeList(), "", StorageType.SSD); + assertEquals(StorageType.SSD, blockManagerStub.getLastStorageType()); + + server.allocateBlock(blockSize, 1, replicationConfig, "o", + new ExcludeList(), "", StorageType.ARCHIVE); + assertEquals(StorageType.ARCHIVE, blockManagerStub.getLastStorageType()); + } + + @Test + void testAllocateBlockDefaultStorageType() throws IOException { + final ReplicationConfig replicationConfig = RatisReplicationConfig + .getInstance(ReplicationFactor.THREE); + final long blockSize = 128 * MB; + + // 6-param overload should default to DISK + server.allocateBlock(blockSize, 1, replicationConfig, "o", + new ExcludeList(), ""); + assertEquals(StorageType.DEFAULT, blockManagerStub.getLastStorageType()); + } + + @Test + void testStorageTypeProtoRoundTrip() throws IOException { + final ReplicationConfig replicationConfig = RatisReplicationConfig + .getInstance(ReplicationFactor.THREE); + final long blockSize = 128 * MB; + + // Build a proto request with storageType = SSD + AllocateScmBlockRequestProto request = AllocateScmBlockRequestProto + .newBuilder() + .setSize(blockSize) + .setNumBlocks(1) + .setType(replicationConfig.getReplicationType()) + .setFactor(ReplicationFactor.THREE) + .setOwner("o") + .setExcludeList(new ExcludeList().getProtoBuf()) + .setStorageType(StorageTypeProto.SSD) + .build(); + + AllocateScmBlockResponseProto response = + service.allocateScmBlock(request, ClientVersion.CURRENT_VERSION); + assertNotNull(response); + assertEquals(1, response.getBlocksCount()); + assertEquals(StorageType.SSD, blockManagerStub.getLastStorageType()); + } + + @Test + void testStorageTypeProtoDefaultWhenUnset() throws IOException { + final ReplicationConfig replicationConfig = RatisReplicationConfig + .getInstance(ReplicationFactor.THREE); + final long blockSize = 128 * MB; + + // Build a proto request WITHOUT storageType + AllocateScmBlockRequestProto request = AllocateScmBlockRequestProto + .newBuilder() + .setSize(blockSize) + .setNumBlocks(1) + .setType(replicationConfig.getReplicationType()) + .setFactor(ReplicationFactor.THREE) + .setOwner("o") + .setExcludeList(new ExcludeList().getProtoBuf()) + .build(); + + AllocateScmBlockResponseProto response = + service.allocateScmBlock(request, ClientVersion.CURRENT_VERSION); + assertNotNull(response); + assertEquals(StorageType.DEFAULT, blockManagerStub.getLastStorageType()); + } + private List getNetworkNames() { return nodeManager.getAllNodes().stream() .map(NodeImpl::getNetworkName) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyRequest.java index afaa3d197eb3..b77cba224022 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyRequest.java @@ -213,7 +213,7 @@ protected List< OmKeyLocationInfo > allocateBlock(ScmClient scmClient, try { allocatedBlocks = scmClient.getBlockClient() .allocateBlock(scmBlockSize, numBlocks, replicationConfig, serviceID, - excludeList, clientMachine); + excludeList, clientMachine, storageType); } catch (SCMException ex) { omMetrics.incNumBlockAllocateCallFails(); if (ex.getResult() diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ScmBlockLocationTestingClient.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ScmBlockLocationTestingClient.java index 823a64052570..aea60509e5ea 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ScmBlockLocationTestingClient.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ScmBlockLocationTestingClient.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hdds.client.ContainerBlockID; import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.client.StandaloneReplicationConfig; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; import org.apache.hadoop.hdds.scm.AddSCMRequest; @@ -119,7 +120,8 @@ public ScmBlockLocationTestingClient(String clusterID, String scmId, @Override public List allocateBlock(long size, int num, ReplicationConfig config, - String owner, ExcludeList excludeList, String clientMachine) + String owner, ExcludeList excludeList, String clientMachine, + StorageType storageType) throws IOException { DatanodeDetails datanodeDetails = randomDatanodeDetails(); Pipeline pipeline = createPipeline(datanodeDetails); From 94a2b644bfbd1499572c97ba1d365cd1ddd5f43a Mon Sep 17 00:00:00 2001 From: Aleksei Ieshin Date: Tue, 24 Feb 2026 15:05:02 +1100 Subject: [PATCH 5/7] Storage tiering. Add scm pipeline filtering --- .../hdds/scm/PipelineRequestInformation.java | 18 +- .../src/main/resources/ozone-default.xml | 35 +++ .../hdds/scm/block/BlockManagerImpl.java | 4 +- .../pipeline/PipelineStorageTypeFilter.java | 74 ++++++ .../pipeline/WritableContainerFactory.java | 23 +- .../pipeline/WritableContainerProvider.java | 7 + .../pipeline/WritableECContainerProvider.java | 18 ++ .../WritableRatisContainerProvider.java | 45 +++- ...ocationProtocolServerSideTranslatorPB.java | 2 +- .../scm/server/SCMBlockProtocolServer.java | 2 +- .../hdds/scm/block/TestBlockManager.java | 32 ++- .../TestPipelineStorageTypeFilter.java | 230 +++++++++++++++++ .../TestWritableECContainerProvider.java | 239 ++++++++++++++++++ .../TestWritableRatisContainerProvider.java | 96 +++++++ .../server/TestSCMBlockProtocolServer.java | 2 +- .../hadoop/ozone/client/rpc/RpcClient.java | 1 - .../scm/pipeline/TestSCMPipelineMetrics.java | 3 +- .../om/ScmBlockLocationTestingClient.java | 2 +- .../om/request/key/TestOMKeyRequest.java | 2 +- 19 files changed, 817 insertions(+), 18 deletions(-) create mode 100644 hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStorageTypeFilter.java create mode 100644 hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineStorageTypeFilter.java diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/PipelineRequestInformation.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/PipelineRequestInformation.java index bef11eb7365d..4ba99082caa0 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/PipelineRequestInformation.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/PipelineRequestInformation.java @@ -17,17 +17,21 @@ package org.apache.hadoop.hdds.scm; +import org.apache.hadoop.hdds.protocol.StorageType; + /** * The information of the request of pipeline. */ public final class PipelineRequestInformation { private final long size; + private final StorageType storageType; /** * Builder for PipelineRequestInformation. */ public static class Builder { private long size; + private StorageType storageType; public static Builder getBuilder() { return new Builder(); @@ -43,16 +47,26 @@ public Builder setSize(long sz) { return this; } + public Builder setStorageType(StorageType st) { + this.storageType = st; + return this; + } + public PipelineRequestInformation build() { - return new PipelineRequestInformation(size); + return new PipelineRequestInformation(size, storageType); } } - private PipelineRequestInformation(long size) { + private PipelineRequestInformation(long size, StorageType storageType) { this.size = size; + this.storageType = storageType; } public long getSize() { return size; } + + public StorageType getStorageType() { + return storageType; + } } diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index d63dedb15416..2d097237b207 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -1693,6 +1693,19 @@ If enabled, SCM will auto create RATIS factor ONE pipeline. + + ozone.scm.pipeline.creation.storage-type-aware.enabled + false + OZONE, SCM, PIPELINE + + If enabled, the background pipeline creator will proactively create + storage-type-constrained pipelines for each StorageType (SSD, DISK, + ARCHIVE) in addition to untyped pipelines. This ensures that + pipelines suitable for HOT/WARM/COLD storage policies are available + when containers need to be allocated. Only enable on clusters that + use storage tiering with mixed StorageType datanodes. + + hdds.scm.safemode.threshold.pct 0.99 @@ -4195,6 +4208,28 @@ + + ozone.storage.policy.enabled + false + OZONE, MANAGEMENT + + When enabled, Ozone Manager will resolve and enforce storage policies + (HOT, WARM, COLD, ALL_SSD) on buckets and keys. When disabled, all + storage policy metadata is ignored and default placement is used. + + + + + ozone.default.storage.policy + WARM + OZONE, MANAGEMENT + + Default storage policy used by Ozone Manager when a client does not + specify a storage policy. Supported values are HOT, WARM, COLD, + ALL_SSD. + + + ozone.client.max.ec.stripe.write.retries 10 diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java index 9c375a6cec41..63bafc131ccc 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java @@ -31,8 +31,8 @@ import org.apache.hadoop.hdds.client.ContainerBlockID; import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.conf.ConfigurationSource; -import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.conf.StorageUnit; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.scm.ScmConfig; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.container.ContainerInfo; @@ -165,7 +165,7 @@ public AllocatedBlock allocateBlock(final long size, } ContainerInfo containerInfo = writableContainerFactory.getContainer( - size, replicationConfig, owner, excludeList); + size, replicationConfig, owner, excludeList, storageType); if (containerInfo != null) { return newBlock(containerInfo); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStorageTypeFilter.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStorageTypeFilter.java new file mode 100644 index 000000000000..92a5949c17e4 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStorageTypeFilter.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.pipeline; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.UUID; +import java.util.stream.Collectors; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.StorageType; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto; +import org.apache.hadoop.hdds.scm.node.DatanodeInfo; +import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.apache.hadoop.hdds.scm.node.NodeStatus; + +/** + * Utility for filtering pipelines by StorageType. Builds a set of + * qualifying node UUIDs (nodes that have at least one volume of the + * requested StorageType), then filters pipelines to those whose every + * member is in that set. + */ +final class PipelineStorageTypeFilter { + + private PipelineStorageTypeFilter() { + } + + static Set getNodesWithStorageType(NodeManager nodeManager, + StorageType storageType) { + Set result = new HashSet<>(); + for (DatanodeDetails dn : + nodeManager.getNodes(NodeStatus.inServiceHealthy())) { + DatanodeInfo info = nodeManager.getDatanodeInfo(dn); + if (info == null) { + continue; + } + for (StorageReportProto report : info.getStorageReports()) { + if (StorageType.valueOf(report.getStorageType()) == storageType) { + result.add(dn.getUuid()); + break; + } + } + } + return result; + } + + static List filter(List pipelines, + NodeManager nodeManager, StorageType storageType) { + if (storageType == null) { + return pipelines; + } + Set qualifiedNodes = + getNodesWithStorageType(nodeManager, storageType); + return pipelines.stream() + .filter(p -> p.getNodes().stream() + .allMatch(dn -> qualifiedNodes.contains(dn.getUuid()))) + .collect(Collectors.toList()); + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/WritableContainerFactory.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/WritableContainerFactory.java index b816bc4de7f4..ba41495bc3d9 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/WritableContainerFactory.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/WritableContainerFactory.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hdds.client.ECReplicationConfig; import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList; import org.apache.hadoop.hdds.scm.pipeline.WritableECContainerProvider.WritableECContainerProviderConfig; @@ -45,7 +46,8 @@ public WritableContainerFactory(StorageContainerManager scm) { this.ratisProvider = new WritableRatisContainerProvider( scm.getPipelineManager(), - scm.getContainerManager(), scm.getPipelineChoosePolicy()); + scm.getContainerManager(), scm.getPipelineChoosePolicy(), + scm.getScmNodeManager()); this.standaloneProvider = ratisProvider; WritableECContainerProviderConfig ecProviderConfig = @@ -79,6 +81,25 @@ public ContainerInfo getContainer(final long size, } } + public ContainerInfo getContainer(final long size, + ReplicationConfig repConfig, String owner, ExcludeList excludeList, + StorageType storageType) throws IOException { + switch (repConfig.getReplicationType()) { + case STAND_ALONE: + return standaloneProvider.getContainer(size, repConfig, owner, + excludeList, storageType); + case RATIS: + return ratisProvider.getContainer(size, repConfig, owner, + excludeList, storageType); + case EC: + return ecProvider.getContainer(size, (ECReplicationConfig) repConfig, + owner, excludeList, storageType); + default: + throw new IOException(repConfig.getReplicationType() + + " is an invalid replication type"); + } + } + private long getConfiguredContainerSize(ConfigurationSource conf) { return (long) conf.getStorageSize(OZONE_SCM_CONTAINER_SIZE, OZONE_SCM_CONTAINER_SIZE_DEFAULT, BYTES); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/WritableContainerProvider.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/WritableContainerProvider.java index 8eb3d1233f2e..b5c976ab66ef 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/WritableContainerProvider.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/WritableContainerProvider.java @@ -19,6 +19,7 @@ import java.io.IOException; import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList; @@ -53,4 +54,10 @@ ContainerInfo getContainer(long size, T repConfig, String owner, ExcludeList excludeList) throws IOException; + default ContainerInfo getContainer(long size, T repConfig, + String owner, ExcludeList excludeList, StorageType storageType) + throws IOException { + return getContainer(size, repConfig, owner, excludeList); + } + } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/WritableECContainerProvider.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/WritableECContainerProvider.java index d7c4b7705f46..06f4992fd381 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/WritableECContainerProvider.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/WritableECContainerProvider.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hdds.conf.PostConstruct; import org.apache.hadoop.hdds.conf.ReconfigurableConfig; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.scm.PipelineChoosePolicy; import org.apache.hadoop.hdds.scm.PipelineRequestInformation; import org.apache.hadoop.hdds.scm.container.ContainerID; @@ -93,6 +94,20 @@ public WritableECContainerProvider(WritableECContainerProviderConfig config, public ContainerInfo getContainer(final long size, ECReplicationConfig repConfig, String owner, ExcludeList excludeList) throws IOException { + return getContainerInternal(size, repConfig, owner, excludeList, null); + } + + @Override + public ContainerInfo getContainer(final long size, + ECReplicationConfig repConfig, String owner, ExcludeList excludeList, + StorageType storageType) throws IOException { + return getContainerInternal(size, repConfig, owner, excludeList, + storageType); + } + + private ContainerInfo getContainerInternal(final long size, + ECReplicationConfig repConfig, String owner, ExcludeList excludeList, + StorageType storageType) throws IOException { int maximumPipelines = getMaximumPipelines(repConfig); int openPipelineCount; synchronized (this) { @@ -115,12 +130,15 @@ public ContainerInfo getContainer(final long size, } List existingPipelines = pipelineManager.getPipelines( repConfig, Pipeline.PipelineState.OPEN); + existingPipelines = PipelineStorageTypeFilter.filter( + existingPipelines, nodeManager, storageType); final int pipelineCount = existingPipelines.size(); LOG.debug("Checking existing pipelines: {}", existingPipelines); PipelineRequestInformation pri = PipelineRequestInformation.Builder.getBuilder() .setSize(size) + .setStorageType(storageType) .build(); while (!existingPipelines.isEmpty()) { int pipelineIndex = diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/WritableRatisContainerProvider.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/WritableRatisContainerProvider.java index a61b32892352..af03d7d3a1ca 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/WritableRatisContainerProvider.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/WritableRatisContainerProvider.java @@ -22,12 +22,14 @@ import java.util.List; import java.util.stream.Collectors; import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.scm.PipelineChoosePolicy; import org.apache.hadoop.hdds.scm.PipelineRequestInformation; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerManager; import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList; import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.node.NodeManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,20 +45,44 @@ public class WritableRatisContainerProvider private final PipelineManager pipelineManager; private final PipelineChoosePolicy pipelineChoosePolicy; private final ContainerManager containerManager; + private final NodeManager nodeManager; public WritableRatisContainerProvider( PipelineManager pipelineManager, ContainerManager containerManager, - PipelineChoosePolicy pipelineChoosePolicy) { + PipelineChoosePolicy pipelineChoosePolicy, + NodeManager nodeManager) { this.pipelineManager = pipelineManager; this.containerManager = containerManager; this.pipelineChoosePolicy = pipelineChoosePolicy; + this.nodeManager = nodeManager; + } + + public WritableRatisContainerProvider( + PipelineManager pipelineManager, + ContainerManager containerManager, + PipelineChoosePolicy pipelineChoosePolicy) { + this(pipelineManager, containerManager, pipelineChoosePolicy, null); } @Override public ContainerInfo getContainer(final long size, ReplicationConfig repConfig, String owner, ExcludeList excludeList) throws IOException { + return getContainerInternal(size, repConfig, owner, excludeList, null); + } + + @Override + public ContainerInfo getContainer(final long size, + ReplicationConfig repConfig, String owner, ExcludeList excludeList, + StorageType storageType) throws IOException { + return getContainerInternal(size, repConfig, owner, excludeList, + storageType); + } + + private ContainerInfo getContainerInternal(final long size, + ReplicationConfig repConfig, String owner, ExcludeList excludeList, + StorageType storageType) throws IOException { /* Here is the high level logic. @@ -77,10 +103,13 @@ public ContainerInfo getContainer(final long size, //in downstream managers. PipelineRequestInformation req = - PipelineRequestInformation.Builder.getBuilder().setSize(size).build(); + PipelineRequestInformation.Builder.getBuilder() + .setSize(size) + .setStorageType(storageType) + .build(); ContainerInfo containerInfo = - getContainer(repConfig, owner, excludeList, req); + getContainer(repConfig, owner, excludeList, req, storageType); if (containerInfo != null) { return containerInfo; } @@ -126,7 +155,8 @@ public ContainerInfo getContainer(final long size, // If Exception occurred or successful creation of pipeline do one // final try to fetch pipelines. - containerInfo = getContainer(repConfig, owner, excludeList, req); + containerInfo = getContainer(repConfig, owner, excludeList, req, + storageType); if (containerInfo != null) { return containerInfo; } @@ -143,7 +173,8 @@ public ContainerInfo getContainer(final long size, @Nullable private ContainerInfo getContainer(ReplicationConfig repConfig, String owner, - ExcludeList excludeList, PipelineRequestInformation req) { + ExcludeList excludeList, PipelineRequestInformation req, + StorageType storageType) { // Acquire pipeline manager lock, to avoid any updates to pipeline // while allocate container happens. This is to avoid scenario like // mentioned in HDDS-5655. @@ -151,6 +182,10 @@ private ContainerInfo getContainer(ReplicationConfig repConfig, String owner, try { List availablePipelines = findPipelinesByState(repConfig, excludeList, Pipeline.PipelineState.OPEN); + if (nodeManager != null) { + availablePipelines = PipelineStorageTypeFilter.filter( + availablePipelines, nodeManager, storageType); + } return selectContainer(availablePipelines, req, owner, excludeList); } finally { pipelineManager.releaseReadLock(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/ScmBlockLocationProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/ScmBlockLocationProtocolServerSideTranslatorPB.java index 981475c52e61..f30035ff027d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/ScmBlockLocationProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/ScmBlockLocationProtocolServerSideTranslatorPB.java @@ -24,9 +24,9 @@ import java.util.stream.Collectors; import org.apache.hadoop.hdds.annotation.InterfaceAudience; import org.apache.hadoop.hdds.client.ReplicationConfig; -import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.DatanodeDetails.Port.Name; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos; import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.AllocateBlockResponse; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java index 7a377b419c2c..c64e64460a4b 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java @@ -47,8 +47,8 @@ import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.DatanodeID; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos; import org.apache.hadoop.hdds.scm.AddSCMRequest; import org.apache.hadoop.hdds.scm.ScmInfo; diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java index 70e5b53c3d59..b40153123a43 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java @@ -44,8 +44,8 @@ import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.client.ReplicationConfig; -import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto; @@ -575,6 +575,36 @@ public void testBlockAllocationWithNoAvailablePipelines() new ExcludeList(), StorageType.DEFAULT)); } + /** + * Integration test: verifies the full BlockManagerImpl → WritableContainerFactory + * → WritableRatisContainerProvider → PipelineStorageTypeFilter chain. + * MockNodeManager reports only DISK volumes. Requesting SSD should fail + * because the filter removes all existing pipelines, and newly created + * pipelines also contain DISK-only nodes so they too get filtered on + * the second attempt. + */ + @Test + public void testAllocateBlockWithNonMatchingStorageTypeFails() + throws Exception { + pipelineManager.createPipeline(replicationConfig); + HddsTestUtils.openAllRatisPipelines(pipelineManager); + + // Verify DISK allocation works (baseline — all nodes report DISK) + AllocatedBlock diskBlock = blockManager.allocateBlock( + DEFAULT_BLOCK_SIZE, replicationConfig, OzoneConsts.OZONE, + new ExcludeList(), StorageType.DISK); + assertNotNull(diskBlock); + + // SSD allocation should fail: MockNodeManager nodes only have DISK + // volumes, so the PipelineStorageTypeFilter removes all pipelines, + // pipeline creation adds another DISK-only pipeline which also gets + // filtered, resulting in IOException. + assertThrows(IOException.class, + () -> blockManager.allocateBlock(DEFAULT_BLOCK_SIZE, + replicationConfig, OzoneConsts.OZONE, + new ExcludeList(), StorageType.SSD)); + } + private class DatanodeCommandHandler implements EventHandler { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineStorageTypeFilter.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineStorageTypeFilter.java new file mode 100644 index 000000000000..eca565c0d752 --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineStorageTypeFilter.java @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.pipeline; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.UUID; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; +import org.apache.hadoop.hdds.protocol.StorageType; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.StorageTypeProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto; +import org.apache.hadoop.hdds.scm.node.DatanodeInfo; +import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.apache.hadoop.hdds.scm.node.NodeStatus; +import org.junit.jupiter.api.Test; + +/** + * Tests for PipelineStorageTypeFilter. + */ +class TestPipelineStorageTypeFilter { + + @Test + void testNullStorageTypeReturnsAllPipelines() { + NodeManager nodeManager = mock(NodeManager.class); + List pipelines = Arrays.asList( + MockPipeline.createRatisPipeline(), + MockPipeline.createRatisPipeline()); + List result = PipelineStorageTypeFilter.filter( + pipelines, nodeManager, null); + assertEquals(2, result.size()); + } + + @Test + void testFilterRetainsPipelinesWithMatchingNodes() { + NodeManager nodeManager = mock(NodeManager.class); + + // Create nodes with SSD storage + DatanodeDetails ssdNode1 = MockDatanodeDetails.randomDatanodeDetails(); + DatanodeDetails ssdNode2 = MockDatanodeDetails.randomDatanodeDetails(); + DatanodeDetails ssdNode3 = MockDatanodeDetails.randomDatanodeDetails(); + + // Create nodes with DISK storage only + DatanodeDetails diskNode1 = MockDatanodeDetails.randomDatanodeDetails(); + DatanodeDetails diskNode2 = MockDatanodeDetails.randomDatanodeDetails(); + DatanodeDetails diskNode3 = MockDatanodeDetails.randomDatanodeDetails(); + + List allNodes = Arrays.asList( + ssdNode1, ssdNode2, ssdNode3, diskNode1, diskNode2, diskNode3); + when(nodeManager.getNodes(NodeStatus.inServiceHealthy())) + .thenReturn(allNodes); + + // Configure SSD nodes to report SSD storage + for (DatanodeDetails ssdNode : Arrays.asList(ssdNode1, ssdNode2, + ssdNode3)) { + DatanodeInfo ssdInfo = mockDatanodeInfo(ssdNode, + StorageTypeProto.SSD); + when(nodeManager.getDatanodeInfo(ssdNode)).thenReturn(ssdInfo); + } + + // Configure DISK nodes to report DISK storage + for (DatanodeDetails diskNode : Arrays.asList(diskNode1, diskNode2, + diskNode3)) { + DatanodeInfo diskInfo = mockDatanodeInfo(diskNode, + StorageTypeProto.DISK); + when(nodeManager.getDatanodeInfo(diskNode)).thenReturn(diskInfo); + } + + // Pipeline with all SSD nodes + Pipeline ssdPipeline = createPipelineWithNodes( + Arrays.asList(ssdNode1, ssdNode2, ssdNode3)); + + // Pipeline with all DISK nodes + Pipeline diskPipeline = createPipelineWithNodes( + Arrays.asList(diskNode1, diskNode2, diskNode3)); + + // Mixed pipeline + Pipeline mixedPipeline = createPipelineWithNodes( + Arrays.asList(ssdNode1, diskNode1, ssdNode2)); + + List pipelines = new ArrayList<>( + Arrays.asList(ssdPipeline, diskPipeline, mixedPipeline)); + + // Filter for SSD — only the all-SSD pipeline should remain + List ssdResult = PipelineStorageTypeFilter.filter( + pipelines, nodeManager, StorageType.SSD); + assertEquals(1, ssdResult.size()); + assertEquals(ssdPipeline.getId(), ssdResult.get(0).getId()); + + // Filter for DISK — only the all-DISK pipeline should remain + List diskResult = PipelineStorageTypeFilter.filter( + pipelines, nodeManager, StorageType.DISK); + assertEquals(1, diskResult.size()); + assertEquals(diskPipeline.getId(), diskResult.get(0).getId()); + } + + @Test + void testFilterReturnsEmptyWhenNoMatch() { + NodeManager nodeManager = mock(NodeManager.class); + + DatanodeDetails diskNode1 = MockDatanodeDetails.randomDatanodeDetails(); + DatanodeDetails diskNode2 = MockDatanodeDetails.randomDatanodeDetails(); + DatanodeDetails diskNode3 = MockDatanodeDetails.randomDatanodeDetails(); + + when(nodeManager.getNodes(NodeStatus.inServiceHealthy())) + .thenReturn(Arrays.asList(diskNode1, diskNode2, diskNode3)); + + for (DatanodeDetails dn : Arrays.asList(diskNode1, diskNode2, + diskNode3)) { + DatanodeInfo info = mockDatanodeInfo(dn, StorageTypeProto.DISK); + when(nodeManager.getDatanodeInfo(dn)).thenReturn(info); + } + + Pipeline pipeline = createPipelineWithNodes( + Arrays.asList(diskNode1, diskNode2, diskNode3)); + List pipelines = new ArrayList<>( + Collections.singletonList(pipeline)); + + // Filter for SSD — no pipeline should match + List result = PipelineStorageTypeFilter.filter( + pipelines, nodeManager, StorageType.SSD); + assertTrue(result.isEmpty()); + } + + @Test + void testGetNodesWithStorageType() { + NodeManager nodeManager = mock(NodeManager.class); + + DatanodeDetails ssdNode = MockDatanodeDetails.randomDatanodeDetails(); + DatanodeDetails diskNode = MockDatanodeDetails.randomDatanodeDetails(); + DatanodeDetails bothNode = MockDatanodeDetails.randomDatanodeDetails(); + + when(nodeManager.getNodes(NodeStatus.inServiceHealthy())) + .thenReturn(Arrays.asList(ssdNode, diskNode, bothNode)); + + // Create mock DatanodeInfo objects first, then stub nodeManager + DatanodeInfo ssdInfo = mockDatanodeInfo(ssdNode, StorageTypeProto.SSD); + DatanodeInfo diskInfo = mockDatanodeInfo(diskNode, StorageTypeProto.DISK); + DatanodeInfo bothInfo = mock(DatanodeInfo.class); + when(bothInfo.getStorageReports()).thenReturn(Arrays.asList( + createStorageReport(StorageTypeProto.SSD), + createStorageReport(StorageTypeProto.DISK))); + + when(nodeManager.getDatanodeInfo(ssdNode)).thenReturn(ssdInfo); + when(nodeManager.getDatanodeInfo(diskNode)).thenReturn(diskInfo); + when(nodeManager.getDatanodeInfo(bothNode)).thenReturn(bothInfo); + + Set ssdNodes = PipelineStorageTypeFilter + .getNodesWithStorageType(nodeManager, StorageType.SSD); + assertEquals(2, ssdNodes.size()); + assertTrue(ssdNodes.contains(ssdNode.getUuid())); + assertTrue(ssdNodes.contains(bothNode.getUuid())); + + Set diskNodes = PipelineStorageTypeFilter + .getNodesWithStorageType(nodeManager, StorageType.DISK); + assertEquals(2, diskNodes.size()); + assertTrue(diskNodes.contains(diskNode.getUuid())); + assertTrue(diskNodes.contains(bothNode.getUuid())); + } + + @Test + void testNodeWithNullDatanodeInfoIsSkipped() { + NodeManager nodeManager = mock(NodeManager.class); + + DatanodeDetails node = MockDatanodeDetails.randomDatanodeDetails(); + when(nodeManager.getNodes(NodeStatus.inServiceHealthy())) + .thenReturn(Collections.singletonList(node)); + when(nodeManager.getDatanodeInfo(node)).thenReturn(null); + + Set result = PipelineStorageTypeFilter + .getNodesWithStorageType(nodeManager, StorageType.SSD); + assertTrue(result.isEmpty()); + } + + private static DatanodeInfo mockDatanodeInfo(DatanodeDetails dn, + StorageTypeProto storageType) { + DatanodeInfo info = mock(DatanodeInfo.class); + when(info.getStorageReports()).thenReturn( + Collections.singletonList(createStorageReport(storageType))); + return info; + } + + private static StorageReportProto createStorageReport( + StorageTypeProto storageType) { + return StorageReportProto.newBuilder() + .setStorageUuid("uuid-" + UUID.randomUUID()) + .setStorageLocation("/data") + .setCapacity(100L * 1024 * 1024 * 1024) + .setScmUsed(10L * 1024 * 1024 * 1024) + .setRemaining(90L * 1024 * 1024 * 1024) + .setStorageType(storageType) + .build(); + } + + private static Pipeline createPipelineWithNodes( + List nodes) { + return Pipeline.newBuilder() + .setState(Pipeline.PipelineState.OPEN) + .setId(PipelineID.randomId()) + .setReplicationConfig( + org.apache.hadoop.hdds.client.RatisReplicationConfig.getInstance( + org.apache.hadoop.hdds.protocol.proto.HddsProtos + .ReplicationFactor.THREE)) + .setNodes(nodes) + .build(); + } +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestWritableECContainerProvider.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestWritableECContainerProvider.java index b3c34b44e4c2..dcbb633a3f5d 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestWritableECContainerProvider.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestWritableECContainerProvider.java @@ -35,6 +35,7 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; import java.io.File; import java.io.IOException; @@ -55,7 +56,10 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.StorageTypeProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto; import org.apache.hadoop.hdds.scm.PipelineChoosePolicy; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.container.ContainerID; @@ -70,6 +74,9 @@ import org.apache.hadoop.hdds.scm.net.NetworkTopologyImpl; import org.apache.hadoop.hdds.scm.net.NodeSchema; import org.apache.hadoop.hdds.scm.net.NodeSchemaManager; +import org.apache.hadoop.hdds.scm.node.DatanodeInfo; +import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.apache.hadoop.hdds.scm.node.NodeStatus; import org.apache.hadoop.hdds.scm.pipeline.WritableECContainerProvider.WritableECContainerProviderConfig; import org.apache.hadoop.hdds.scm.pipeline.choose.algorithms.CapacityPipelineChoosePolicy; import org.apache.hadoop.hdds.scm.pipeline.choose.algorithms.HealthyPipelineChoosePolicy; @@ -78,6 +85,7 @@ import org.apache.hadoop.hdds.utils.db.DBStoreBuilder; import org.apache.hadoop.hdds.utils.db.RocksDatabaseException; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; @@ -576,4 +584,235 @@ private long getMaxContainerSize() { ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, BYTES); } + @ParameterizedTest + @MethodSource("policies") + void testNullStorageTypeSkipsFilter(PipelineChoosePolicy policy) + throws IOException { + provider = createSubject(policy); + // Null storageType should behave identically to the 4-param method — + // both succeed without error + ContainerInfo container4 = provider.getContainer( + 1, repConfig, OWNER, new ExcludeList()); + assertNotNull(container4); + + ContainerInfo container5 = provider.getContainer( + 1, repConfig, OWNER, new ExcludeList(), null); + assertNotNull(container5); + } + + @ParameterizedTest + @MethodSource("policies") + void testStorageTypeDiskMatchesDefaultNodes(PipelineChoosePolicy policy) + throws IOException { + provider = createSubject(policy); + // MockNodeManager reports DISK by default (null StorageTypeProto + // defaults to DISK), so filtering for DISK should succeed + ContainerInfo container = provider.getContainer( + 1, repConfig, OWNER, new ExcludeList(), StorageType.DISK); + assertNotNull(container); + } + + /** + * Tests that StorageType filtering actually rejects pipelines whose nodes + * don't have the requested type. Uses fully mocked components to control + * exactly which storage types each node reports, and prevents new pipeline + * creation so the test can't pass via the fallback allocation path. + */ + @Test + void testStorageTypeFilterRejectsNonMatchingPipelines() + throws IOException { + // Create 5 EC nodes (3+2) + List nodes = new ArrayList<>(); + for (int i = 0; i < 5; i++) { + nodes.add(MockDatanodeDetails.randomDatanodeDetails()); + } + Map replicaIndexes = new HashMap<>(); + for (int i = 0; i < nodes.size(); i++) { + replicaIndexes.put(nodes.get(i), i + 1); + } + + Pipeline pipeline = Pipeline.newBuilder() + .setState(Pipeline.PipelineState.OPEN) + .setId(PipelineID.randomId()) + .setReplicationConfig(repConfig) + .setNodes(nodes) + .setReplicaIndexes(replicaIndexes) + .build(); + + ContainerInfo containerInfo = createContainer(pipeline, repConfig, 1L); + + // Mock PipelineManager: one open pipeline, createPipeline always fails + PipelineManager pm = mock(PipelineManager.class); + when(pm.getPipelineCount(repConfig, Pipeline.PipelineState.OPEN)) + .thenReturn(100); // at max, skip initial allocateContainer + when(pm.getPipelines(repConfig, Pipeline.PipelineState.OPEN)) + .thenReturn(new ArrayList<>(Collections.singletonList(pipeline))); + when(pm.getContainersInPipeline(pipeline.getId())) + .thenReturn(new java.util.TreeSet<>( + Collections.singleton(containerInfo.containerID()))); + + // Mock ContainerManager + ContainerManager cm = mock(ContainerManager.class); + when(cm.getContainer(containerInfo.containerID())) + .thenReturn(containerInfo); + when(cm.getMatchingContainer(anyLong(), anyString(), any(Pipeline.class))) + .thenReturn(containerInfo); + + // Mock NodeManager: all nodes report only DISK + NodeManager nm = mock(NodeManager.class); + when(nm.getNodes(NodeStatus.inServiceHealthy())).thenReturn(nodes); + when(nm.getNodeCount(NodeStatus.inServiceHealthy())).thenReturn( + nodes.size()); + for (DatanodeDetails dn : nodes) { + DatanodeInfo info = mock(DatanodeInfo.class); + when(info.getStorageReports()).thenReturn( + Collections.singletonList(createDiskStorageReport())); + when(nm.getDatanodeInfo(dn)).thenReturn(info); + } + + WritableECContainerProvider ecProvider = new WritableECContainerProvider( + providerConf, getMaxContainerSize(), nm, pm, cm, + new RandomPipelineChoosePolicy()); + + // null storageType: filter is no-op, should return the container + ContainerInfo result = ecProvider.getContainer( + 1, repConfig, OWNER, new ExcludeList(), null); + assertNotNull(result); + assertEquals(containerInfo.containerID(), result.containerID()); + + // DISK storageType: all nodes have DISK, pipeline should pass filter + result = ecProvider.getContainer( + 1, repConfig, OWNER, new ExcludeList(), StorageType.DISK); + assertNotNull(result); + assertEquals(containerInfo.containerID(), result.containerID()); + + // SSD storageType: no nodes have SSD, filter removes the pipeline. + // Since openPipelineCount >= maximumPipelines and nodeCount <= + // maximumPipelines, the fallback allocateContainer is also blocked. + assertThrows(IOException.class, + () -> ecProvider.getContainer( + 1, repConfig, OWNER, new ExcludeList(), StorageType.SSD)); + } + + /** + * Tests that filtering works correctly with a mix of SSD and DISK + * pipelines — the SSD pipeline is returned when SSD is requested, and + * the DISK pipeline is returned when DISK is requested. + */ + @Test + void testStorageTypeFilterSelectsCorrectPipeline() + throws IOException { + // Create two sets of nodes: SSD nodes and DISK nodes + List ssdNodes = new ArrayList<>(); + List diskNodes = new ArrayList<>(); + for (int i = 0; i < 5; i++) { + ssdNodes.add(MockDatanodeDetails.randomDatanodeDetails()); + diskNodes.add(MockDatanodeDetails.randomDatanodeDetails()); + } + + Map ssdIndexes = new HashMap<>(); + Map diskIndexes = new HashMap<>(); + for (int i = 0; i < 5; i++) { + ssdIndexes.put(ssdNodes.get(i), i + 1); + diskIndexes.put(diskNodes.get(i), i + 1); + } + + Pipeline ssdPipeline = Pipeline.newBuilder() + .setState(Pipeline.PipelineState.OPEN) + .setId(PipelineID.randomId()) + .setReplicationConfig(repConfig) + .setNodes(ssdNodes) + .setReplicaIndexes(ssdIndexes) + .build(); + + Pipeline diskPipeline = Pipeline.newBuilder() + .setState(Pipeline.PipelineState.OPEN) + .setId(PipelineID.randomId()) + .setReplicationConfig(repConfig) + .setNodes(diskNodes) + .setReplicaIndexes(diskIndexes) + .build(); + + ContainerInfo ssdContainer = createContainer(ssdPipeline, repConfig, 1L); + ContainerInfo diskContainer = createContainer(diskPipeline, repConfig, 2L); + + // Mock PipelineManager with both pipelines + PipelineManager pm = mock(PipelineManager.class); + when(pm.getPipelineCount(repConfig, Pipeline.PipelineState.OPEN)) + .thenReturn(100); + when(pm.getPipelines(repConfig, Pipeline.PipelineState.OPEN)) + .thenReturn(new ArrayList<>( + java.util.Arrays.asList(ssdPipeline, diskPipeline))); + when(pm.getContainersInPipeline(ssdPipeline.getId())) + .thenReturn(new java.util.TreeSet<>( + Collections.singleton(ssdContainer.containerID()))); + when(pm.getContainersInPipeline(diskPipeline.getId())) + .thenReturn(new java.util.TreeSet<>( + Collections.singleton(diskContainer.containerID()))); + + // Mock ContainerManager + ContainerManager cm = mock(ContainerManager.class); + when(cm.getContainer(ssdContainer.containerID())) + .thenReturn(ssdContainer); + when(cm.getContainer(diskContainer.containerID())) + .thenReturn(diskContainer); + + // Mock NodeManager + List allNodes = new ArrayList<>(); + allNodes.addAll(ssdNodes); + allNodes.addAll(diskNodes); + NodeManager nm = mock(NodeManager.class); + when(nm.getNodes(NodeStatus.inServiceHealthy())).thenReturn(allNodes); + when(nm.getNodeCount(NodeStatus.inServiceHealthy())) + .thenReturn(allNodes.size()); + for (DatanodeDetails dn : ssdNodes) { + DatanodeInfo info = mock(DatanodeInfo.class); + when(info.getStorageReports()).thenReturn( + Collections.singletonList(createSsdStorageReport())); + when(nm.getDatanodeInfo(dn)).thenReturn(info); + } + for (DatanodeDetails dn : diskNodes) { + DatanodeInfo info = mock(DatanodeInfo.class); + when(info.getStorageReports()).thenReturn( + Collections.singletonList(createDiskStorageReport())); + when(nm.getDatanodeInfo(dn)).thenReturn(info); + } + + WritableECContainerProvider ecProvider = new WritableECContainerProvider( + providerConf, getMaxContainerSize(), nm, pm, cm, + new RandomPipelineChoosePolicy()); + + // Request SSD: should get the SSD pipeline's container + ContainerInfo result = ecProvider.getContainer( + 1, repConfig, OWNER, new ExcludeList(), StorageType.SSD); + assertEquals(ssdPipeline.getId(), result.getPipelineID()); + + // Request DISK: should get the DISK pipeline's container + result = ecProvider.getContainer( + 1, repConfig, OWNER, new ExcludeList(), StorageType.DISK); + assertEquals(diskPipeline.getId(), result.getPipelineID()); + } + + private static StorageReportProto createDiskStorageReport() { + return StorageReportProto.newBuilder() + .setStorageUuid("uuid-" + java.util.UUID.randomUUID()) + .setStorageLocation("/data") + .setCapacity(100L * 1024 * 1024 * 1024) + .setScmUsed(10L * 1024 * 1024 * 1024) + .setRemaining(90L * 1024 * 1024 * 1024) + .setStorageType(StorageTypeProto.DISK) + .build(); + } + + private static StorageReportProto createSsdStorageReport() { + return StorageReportProto.newBuilder() + .setStorageUuid("uuid-" + java.util.UUID.randomUUID()) + .setStorageLocation("/ssd") + .setCapacity(100L * 1024 * 1024 * 1024) + .setScmUsed(10L * 1024 * 1024 * 1024) + .setRemaining(90L * 1024 * 1024 * 1024) + .setStorageType(StorageTypeProto.SSD) + .build(); + } + } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestWritableRatisContainerProvider.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestWritableRatisContainerProvider.java index a1ba81d0a70a..5a13bfb825b3 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestWritableRatisContainerProvider.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestWritableRatisContainerProvider.java @@ -24,6 +24,7 @@ import static org.apache.hadoop.hdds.scm.pipeline.Pipeline.PipelineState.OPEN; import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -31,17 +32,26 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; +import java.util.UUID; import java.util.concurrent.atomic.AtomicLong; import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.StorageTypeProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto; import org.apache.hadoop.hdds.scm.PipelineChoosePolicy; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerManager; import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList; import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.node.DatanodeInfo; +import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.apache.hadoop.hdds.scm.node.NodeStatus; import org.apache.hadoop.hdds.scm.pipeline.choose.algorithms.RandomPipelineChoosePolicy; import org.junit.jupiter.api.RepeatedTest; import org.junit.jupiter.api.Test; @@ -161,6 +171,12 @@ private WritableRatisContainerProvider createSubject() { pipelineManager, containerManager, policy); } + private WritableRatisContainerProvider createSubject( + NodeManager nodeManager) { + return new WritableRatisContainerProvider( + pipelineManager, containerManager, policy, nodeManager); + } + private void verifyPipelineCreated() throws IOException { verify(pipelineManager, times(2)) .getPipelines(REPLICATION_CONFIG, OPEN, emptySet(), emptySet()); @@ -175,4 +191,84 @@ private void verifyPipelineNotCreated() throws IOException { .createPipeline(REPLICATION_CONFIG); } + @Test + void returnsContainerWithNullStorageType() throws Exception { + Pipeline pipeline = MockPipeline.createPipeline(3); + ContainerInfo existingContainer = pipelineHasContainer(pipeline); + existingPipelines(pipeline); + + ContainerInfo container = createSubject().getContainer( + CONTAINER_SIZE, REPLICATION_CONFIG, OWNER, NO_EXCLUSION, null); + assertSame(existingContainer, container); + } + + @Test + void returnsContainerWithMatchingStorageType() throws Exception { + Pipeline pipeline = MockPipeline.createPipeline(3); + ContainerInfo existingContainer = pipelineHasContainer(pipeline); + existingPipelines(pipeline); + + // Set up NodeManager to report SSD for all pipeline nodes + NodeManager nodeManager = mock(NodeManager.class); + List nodes = pipeline.getNodes(); + when(nodeManager.getNodes(NodeStatus.inServiceHealthy())) + .thenReturn(nodes); + for (DatanodeDetails dn : nodes) { + DatanodeInfo info = mock(DatanodeInfo.class); + when(info.getStorageReports()).thenReturn( + Collections.singletonList(StorageReportProto.newBuilder() + .setStorageUuid("uuid-" + UUID.randomUUID()) + .setStorageLocation("/data") + .setCapacity(100L * 1024 * 1024 * 1024) + .setScmUsed(10L * 1024 * 1024 * 1024) + .setRemaining(90L * 1024 * 1024 * 1024) + .setStorageType(StorageTypeProto.SSD) + .build())); + when(nodeManager.getDatanodeInfo(dn)).thenReturn(info); + } + + ContainerInfo container = createSubject(nodeManager).getContainer( + CONTAINER_SIZE, REPLICATION_CONFIG, OWNER, NO_EXCLUSION, + StorageType.SSD); + assertSame(existingContainer, container); + } + + @Test + void filtersOutPipelinesWithNonMatchingStorageType() throws Exception { + Pipeline pipeline = MockPipeline.createPipeline(3); + pipelineHasContainer(pipeline); + existingPipelines(pipeline); + + // Set up NodeManager to report DISK for all nodes + NodeManager nodeManager = mock(NodeManager.class); + List nodes = pipeline.getNodes(); + when(nodeManager.getNodes(NodeStatus.inServiceHealthy())) + .thenReturn(nodes); + for (DatanodeDetails dn : nodes) { + DatanodeInfo info = mock(DatanodeInfo.class); + when(info.getStorageReports()).thenReturn( + Collections.singletonList(StorageReportProto.newBuilder() + .setStorageUuid("uuid-" + UUID.randomUUID()) + .setStorageLocation("/data") + .setCapacity(100L * 1024 * 1024 * 1024) + .setScmUsed(10L * 1024 * 1024 * 1024) + .setRemaining(90L * 1024 * 1024 * 1024) + .setStorageType(StorageTypeProto.DISK) + .build())); + when(nodeManager.getDatanodeInfo(dn)).thenReturn(info); + } + + // A new pipeline will also be created with DISK nodes, but the filter + // on existing pipelines should remove them. + // Pipeline creation will also fail since we're in a mock environment. + when(pipelineManager.createPipeline(REPLICATION_CONFIG)) + .thenThrow(new SCMException( + SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE)); + + assertThrows(IOException.class, + () -> createSubject(nodeManager).getContainer( + CONTAINER_SIZE, REPLICATION_CONFIG, OWNER, NO_EXCLUSION, + StorageType.SSD)); + } + } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMBlockProtocolServer.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMBlockProtocolServer.java index da13da4cd9b7..6b71f7595beb 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMBlockProtocolServer.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMBlockProtocolServer.java @@ -44,13 +44,13 @@ import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.StorageTypeProto; import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos; import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.AllocateScmBlockRequestProto; import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.AllocateScmBlockResponseProto; import org.apache.hadoop.hdds.scm.HddsTestUtils; -import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.scm.block.BlockManager; import org.apache.hadoop.hdds.scm.block.DeletedBlockLog; import org.apache.hadoop.hdds.scm.block.DeletedBlockLogImpl; diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java index c56c079c3e4f..9d08c11cb6a4 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java @@ -76,7 +76,6 @@ import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.OzoneStoragePolicy; import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.ContainerClientMetrics; diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineMetrics.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineMetrics.java index f0ebb60078c1..e572819812d9 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineMetrics.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineMetrics.java @@ -29,6 +29,7 @@ import java.util.Optional; import java.util.concurrent.TimeoutException; import org.apache.hadoop.hdds.client.RatisReplicationConfig; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; import org.apache.hadoop.hdds.scm.container.common.helpers.AllocatedBlock; import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList; @@ -93,7 +94,7 @@ public void testNumBlocksAllocated() throws IOException, TimeoutException { cluster.getStorageContainerManager().getScmBlockManager() .allocateBlock(5, RatisReplicationConfig.getInstance(ReplicationFactor.ONE), - "Test", new ExcludeList()); + "Test", new ExcludeList(), StorageType.DEFAULT); MetricsRecordBuilder metrics = getMetrics(SCMPipelineMetrics.class.getSimpleName()); Pipeline pipeline = block.getPipeline(); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ScmBlockLocationTestingClient.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ScmBlockLocationTestingClient.java index aea60509e5ea..32aeb473afda 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ScmBlockLocationTestingClient.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ScmBlockLocationTestingClient.java @@ -32,8 +32,8 @@ import org.apache.hadoop.hdds.client.ContainerBlockID; import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.client.StandaloneReplicationConfig; -import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; import org.apache.hadoop.hdds.scm.AddSCMRequest; import org.apache.hadoop.hdds.scm.ScmInfo; diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java index 45842547934c..90cf32cd517f 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java @@ -39,10 +39,10 @@ import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.client.ContainerBlockID; import org.apache.hadoop.hdds.client.RatisReplicationConfig; -import org.apache.hadoop.hdds.protocol.OzoneStoragePolicy; import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.client.StandaloneReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.OzoneStoragePolicy; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; import org.apache.hadoop.hdds.scm.container.ContainerInfo; From b79d03427facd57ac630dffc3bf05ebbf9b42494 Mon Sep 17 00:00:00 2001 From: Aleksei Ieshin Date: Fri, 27 Feb 2026 17:49:59 +1100 Subject: [PATCH 6/7] Storage tiering. Add creation of the typed pipelines --- .../apache/hadoop/hdds/scm/ScmConfigKeys.java | 5 + .../pipeline/BackgroundPipelineCreator.java | 69 +++++++++++- .../hdds/scm/pipeline/PipelineManager.java | 6 + .../scm/pipeline/PipelineManagerImpl.java | 30 +++++ ...tBackgroundPipelineCreatorStorageType.java | 103 ++++++++++++++++++ .../scm/pipeline/TestPipelineManagerImpl.java | 29 +++++ 6 files changed, 236 insertions(+), 6 deletions(-) create mode 100644 hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestBackgroundPipelineCreatorStorageType.java diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index 23400b1a06b4..49c767725b03 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -445,6 +445,11 @@ public final class ScmConfigKeys { public static final String OZONE_SCM_PIPELINE_SCRUB_INTERVAL_DEFAULT = "150s"; + public static final String OZONE_SCM_PIPELINE_CREATION_STORAGE_TYPE_AWARE = + "ozone.scm.pipeline.creation.storage-type-aware.enabled"; + public static final boolean + OZONE_SCM_PIPELINE_CREATION_STORAGE_TYPE_AWARE_DEFAULT = false; + // Allow SCM to auto create factor ONE ratis pipeline. public static final String OZONE_SCM_PIPELINE_AUTO_CREATE_FACTOR_ONE = "ozone.scm.pipeline.creation.auto.factor.one"; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreator.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreator.java index 0aefbedbd43b..4420ec5a1fc7 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreator.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreator.java @@ -25,11 +25,14 @@ import static org.apache.hadoop.hdds.scm.ha.SCMService.Event.PRE_CHECK_COMPLETED; import static org.apache.hadoop.hdds.scm.ha.SCMService.Event.UNHEALTHY_TO_HEALTHY_NODE_HANDLER_TRIGGERED; +import com.google.common.annotations.VisibleForTesting; import com.google.common.util.concurrent.ThreadFactoryBuilder; import java.io.IOException; import java.time.Clock; +import java.util.AbstractMap; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.locks.Lock; @@ -40,6 +43,7 @@ import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.client.StandaloneReplicationConfig; import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; import org.apache.hadoop.hdds.scm.ScmConfigKeys; @@ -88,6 +92,7 @@ public class BackgroundPipelineCreator implements SCMService { private final AtomicBoolean running = new AtomicBoolean(false); private final long intervalInMillis; private final Clock clock; + private final boolean storageTypeAwareCreation; BackgroundPipelineCreator(PipelineManager pipelineManager, ConfigurationSource conf, SCMContext scmContext, Clock clock) { @@ -110,6 +115,10 @@ public class BackgroundPipelineCreator implements SCMService { ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); + this.storageTypeAwareCreation = conf.getBoolean( + ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_STORAGE_TYPE_AWARE, + ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_STORAGE_TYPE_AWARE_DEFAULT); + threadName = scmContext.threadNamePrefix() + THREAD_NAME; } @@ -203,7 +212,8 @@ private boolean skipCreation(ReplicationConfig replicationConfig, return true; } - private void createPipelines() throws RuntimeException { + @VisibleForTesting + void createPipelines() throws RuntimeException { // TODO: #CLUTIL Different replication factor may need to be supported HddsProtos.ReplicationType type = HddsProtos.ReplicationType.valueOf( conf.get(OzoneConfigKeys.OZONE_REPLICATION_TYPE, @@ -212,8 +222,7 @@ private void createPipelines() throws RuntimeException { ScmConfigKeys.OZONE_SCM_PIPELINE_AUTO_CREATE_FACTOR_ONE, ScmConfigKeys.OZONE_SCM_PIPELINE_AUTO_CREATE_FACTOR_ONE_DEFAULT); - List list = - new ArrayList<>(); + List replicationConfigs = new ArrayList<>(); for (HddsProtos.ReplicationFactor factor : HddsProtos.ReplicationFactor .values()) { if (factor == ReplicationFactor.ZERO) { @@ -233,10 +242,20 @@ private void createPipelines() throws RuntimeException { // Skip this iteration for creating pipeline continue; } - list.add(replicationConfig); + replicationConfigs.add(replicationConfig); + } + + if (storageTypeAwareCreation) { + createTypedPipelines(replicationConfigs); + } else { + createUntypedPipelines(replicationConfigs); } - LoopingIterator it = new LoopingIterator(list); + LOG.debug("BackgroundPipelineCreator createPipelines finished."); + } + + private void createUntypedPipelines(List configs) { + LoopingIterator it = new LoopingIterator(configs); while (it.hasNext()) { ReplicationConfig replicationConfig = (ReplicationConfig) it.next(); @@ -251,8 +270,46 @@ private void createPipelines() throws RuntimeException { it.remove(); } } + } - LOG.debug("BackgroundPipelineCreator createPipelines finished."); + private void createTypedPipelines(List configs) { + // Build (ReplicationConfig, StorageType) pairs: for each config, + // one null entry (untyped) plus one per concrete StorageType. + StorageType[] storageTypes = { + StorageType.SSD, StorageType.DISK, StorageType.ARCHIVE + }; + List> pairs = new ArrayList<>(); + for (ReplicationConfig config : configs) { + pairs.add(new AbstractMap.SimpleEntry<>(config, null)); + for (StorageType st : storageTypes) { + pairs.add(new AbstractMap.SimpleEntry<>(config, st)); + } + } + + LoopingIterator it = new LoopingIterator(pairs); + while (it.hasNext()) { + @SuppressWarnings("unchecked") + Map.Entry entry = + (Map.Entry) it.next(); + + try { + Pipeline pipeline; + if (entry.getValue() == null) { + pipeline = pipelineManager.createPipeline(entry.getKey()); + } else { + pipeline = pipelineManager.createPipeline( + entry.getKey(), entry.getValue()); + } + LOG.info("Created new pipeline {} with StorageType {}", + pipeline, entry.getValue()); + } catch (IOException ioe) { + it.remove(); + } catch (Throwable t) { + LOG.error("Error while creating pipelines for StorageType " + + entry.getValue(), t); + it.remove(); + } + } } @Override diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java index 6a448d6c88df..52c55471669f 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java @@ -25,6 +25,7 @@ import java.util.Set; import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.utils.db.CodecException; @@ -39,6 +40,11 @@ public interface PipelineManager extends Closeable, PipelineManagerMXBean { Pipeline createPipeline(ReplicationConfig replicationConfig) throws IOException; + default Pipeline createPipeline(ReplicationConfig replicationConfig, + StorageType storageType) throws IOException { + return createPipeline(replicationConfig); + } + Pipeline createPipeline(ReplicationConfig replicationConfig, List excludedNodes, List favoredNodes) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManagerImpl.java index 9c529e22e7e1..f473335c572e 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManagerImpl.java @@ -30,6 +30,7 @@ import java.util.Map; import java.util.NavigableSet; import java.util.Set; +import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -41,6 +42,7 @@ import org.apache.hadoop.hdds.client.StandaloneReplicationConfig; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; @@ -56,6 +58,7 @@ import org.apache.hadoop.hdds.scm.ha.SCMServiceManager; import org.apache.hadoop.hdds.scm.node.DatanodeInfo; import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.apache.hadoop.hdds.scm.node.NodeStatus; import org.apache.hadoop.hdds.scm.server.upgrade.FinalizationManager; import org.apache.hadoop.hdds.server.events.EventPublisher; import org.apache.hadoop.hdds.utils.db.CodecException; @@ -268,6 +271,33 @@ public Pipeline createPipeline(ReplicationConfig replicationConfig, } } + @Override + public Pipeline createPipeline(ReplicationConfig replicationConfig, + StorageType storageType) throws IOException { + if (storageType == null) { + return createPipeline(replicationConfig); + } + // Compute excluded nodes: all healthy nodes that do NOT have the + // requested StorageType. + List allHealthy = + nodeManager.getNodes(NodeStatus.inServiceHealthy()); + Set qualifiedNodeIds = + PipelineStorageTypeFilter.getNodesWithStorageType( + nodeManager, storageType); + + if (qualifiedNodeIds.isEmpty()) { + throw new IOException("No healthy nodes with StorageType " + + storageType + " available for pipeline creation"); + } + + List excludedNodes = allHealthy.stream() + .filter(dn -> !qualifiedNodeIds.contains(dn.getUuid())) + .collect(Collectors.toList()); + + return createPipeline(replicationConfig, excludedNodes, + Collections.emptyList()); + } + private void checkIfPipelineCreationIsAllowed( ReplicationConfig replicationConfig) throws IOException { if (!isPipelineCreationAllowed() && !factorOne(replicationConfig)) { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestBackgroundPipelineCreatorStorageType.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestBackgroundPipelineCreatorStorageType.java new file mode 100644 index 000000000000..8c3e150c0f93 --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestBackgroundPipelineCreatorStorageType.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.pipeline; + +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.atLeastOnce; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.time.Instant; +import java.time.ZoneOffset; +import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.StorageType; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.scm.ha.SCMContext; +import org.apache.ozone.test.TestClock; +import org.junit.jupiter.api.Test; + +/** + * Tests for storage-type-aware pipeline creation in + * BackgroundPipelineCreator. + */ +public class TestBackgroundPipelineCreatorStorageType { + + @Test + public void testStorageTypeAwareDisabled() throws IOException { + OzoneConfiguration conf = new OzoneConfiguration(); + conf.setBoolean( + ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_STORAGE_TYPE_AWARE, + false); + + PipelineManager pipelineManager = mock(PipelineManager.class); + when(pipelineManager.createPipeline(any(ReplicationConfig.class))) + .thenThrow(new IOException("exhausted")); + + SCMContext scmContext = SCMContext.emptyContext(); + + TestClock clock = new TestClock(Instant.now(), ZoneOffset.UTC); + BackgroundPipelineCreator creator = + new BackgroundPipelineCreator(pipelineManager, conf, scmContext, + clock); + + creator.createPipelines(); + + // Untyped createPipeline(ReplicationConfig) should have been called. + verify(pipelineManager, atLeastOnce()) + .createPipeline(any(ReplicationConfig.class)); + // Typed createPipeline(ReplicationConfig, StorageType) should NOT + // have been called. + verify(pipelineManager, never()) + .createPipeline(any(ReplicationConfig.class), + any(StorageType.class)); + } + + @Test + public void testStorageTypeAwareEnabled() throws IOException { + OzoneConfiguration conf = new OzoneConfiguration(); + conf.setBoolean( + ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_STORAGE_TYPE_AWARE, + true); + + PipelineManager pipelineManager = mock(PipelineManager.class); + when(pipelineManager.createPipeline(any(ReplicationConfig.class))) + .thenThrow(new IOException("exhausted")); + when(pipelineManager.createPipeline(any(ReplicationConfig.class), + any(StorageType.class))) + .thenThrow(new IOException("exhausted")); + + SCMContext scmContext = SCMContext.emptyContext(); + + TestClock clock = new TestClock(Instant.now(), ZoneOffset.UTC); + BackgroundPipelineCreator creator = + new BackgroundPipelineCreator(pipelineManager, conf, scmContext, + clock); + + creator.createPipelines(); + + // When storage-type-aware is enabled, the typed method should be called + // for SSD, DISK, and ARCHIVE. + verify(pipelineManager, atLeastOnce()) + .createPipeline(any(ReplicationConfig.class), + any(StorageType.class)); + } +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineManagerImpl.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineManagerImpl.java index e7fc6f14f9b6..2e8919ff833a 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineManagerImpl.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineManagerImpl.java @@ -71,6 +71,7 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.DatanodeID; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos; @@ -920,6 +921,34 @@ public void testWaitForAllocatedPipeline() throws IOException { pipelineManager.close(); } + @Test + public void testCreatePipelineWithStorageType() throws Exception { + PipelineManagerImpl pipelineManager = createPipelineManager(true); + + // MockNodeManager creates storage reports with DISK type by default. + // DISK-typed pipeline should succeed. + Pipeline diskPipeline = pipelineManager.createPipeline( + RatisReplicationConfig.getInstance(ReplicationFactor.THREE), + StorageType.DISK); + assertNotNull(diskPipeline); + assertEquals(3, diskPipeline.getNodes().size()); + + // SSD-typed pipeline should fail since no nodes have SSD storage. + assertThrows(IOException.class, + () -> pipelineManager.createPipeline( + RatisReplicationConfig.getInstance(ReplicationFactor.THREE), + StorageType.SSD)); + + // null StorageType should fall through to untyped creation. + Pipeline untypedPipeline = pipelineManager.createPipeline( + RatisReplicationConfig.getInstance(ReplicationFactor.THREE), + (StorageType) null); + assertNotNull(untypedPipeline); + assertEquals(3, untypedPipeline.getNodes().size()); + + pipelineManager.close(); + } + public void testCreatePipelineForRead() throws IOException { PipelineManager pipelineManager = createPipelineManager(true); List dns = nodeManager From 413af292f53ada5e0d5b051eb698cf59a8bd03eb Mon Sep 17 00:00:00 2001 From: Aleksei Ieshin Date: Wed, 4 Mar 2026 14:51:55 +1100 Subject: [PATCH 7/7] Storage tiering. Create container on matching volume type --- .../scm/storage/ContainerProtocolCalls.java | 23 +- .../container/keyvalue/KeyValueContainer.java | 32 +++ .../container/keyvalue/KeyValueHandler.java | 11 +- .../keyvalue/TestKeyValueContainer.java | 196 ++++++++++++++++++ .../keyvalue/TestKeyValueHandler.java | 184 ++++++++++++++++ .../main/proto/DatanodeClientProtocol.proto | 12 ++ .../hadoop/ozone/om/TestKeyManagerImpl.java | 4 +- .../om/TestOmContainerLocationCache.java | 6 +- .../request/file/TestOMFileCreateRequest.java | 6 +- .../request/key/TestOMKeyCreateRequest.java | 12 +- .../om/request/key/TestOMKeyRequest.java | 4 +- 11 files changed, 478 insertions(+), 12 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/storage/ContainerProtocolCalls.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/storage/ContainerProtocolCalls.java index 15879fb47649..c853f453539a 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/storage/ContainerProtocolCalls.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/storage/ContainerProtocolCalls.java @@ -556,7 +556,7 @@ public static void createRecoveringContainer(XceiverClientSpi client, */ public static void createContainer(XceiverClientSpi client, long containerID, String encodedToken) throws IOException { - createContainer(client, containerID, encodedToken, null, 0); + createContainer(client, containerID, encodedToken, null, 0, null); } /** @@ -571,6 +571,24 @@ public static void createContainer(XceiverClientSpi client, long containerID, String encodedToken, ContainerProtos.ContainerDataProto.State state, int replicaIndex) throws IOException { + createContainer(client, containerID, encodedToken, state, replicaIndex, + null); + } + + /** + * createContainer call that creates a container on the datanode. + * @param client - client + * @param containerID - ID of container + * @param encodedToken - encodedToken if security is enabled + * @param state - state of the container + * @param replicaIndex - index position of the container replica + * @param storageType - storage type for volume selection on the datanode + */ + public static void createContainer(XceiverClientSpi client, + long containerID, String encodedToken, + ContainerProtos.ContainerDataProto.State state, int replicaIndex, + ContainerProtos.StorageTypeProto storageType) + throws IOException { ContainerProtos.CreateContainerRequestProto.Builder createRequest = ContainerProtos.CreateContainerRequestProto.newBuilder(); createRequest @@ -581,6 +599,9 @@ public static void createContainer(XceiverClientSpi client, if (replicaIndex > 0) { createRequest.setReplicaIndex(replicaIndex); } + if (storageType != null) { + createRequest.setStorageType(storageType); + } String id = client.getPipeline().getFirstNode().getUuidString(); ContainerCommandRequestProto.Builder request = diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java index b8214882f12e..782da59a69f7 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java @@ -53,6 +53,7 @@ import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.stream.Collectors; import org.apache.commons.io.FileUtils; import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileUtil; @@ -148,6 +149,23 @@ public void setCheckChunksFilePath(boolean bCheckChunksDirFilePath) { @Override public void create(VolumeSet volumeSet, VolumeChoosingPolicy volumeChoosingPolicy, String clusterId) throws StorageContainerException { + create(volumeSet, volumeChoosingPolicy, clusterId, null); + } + + /** + * Creates a container, filtering volumes by the requested StorageType + * before choosing a volume. If no volumes match the requested type, + * falls back to all available volumes. + * + * @param volumeSet the set of available volumes + * @param volumeChoosingPolicy policy for choosing among candidate volumes + * @param clusterId the cluster ID + * @param storageType the requested storage type, or null for no filtering + */ + public void create(VolumeSet volumeSet, VolumeChoosingPolicy + volumeChoosingPolicy, String clusterId, + org.apache.hadoop.hdds.protocol.StorageType storageType) + throws StorageContainerException { Objects.requireNonNull(volumeChoosingPolicy, "VolumeChoosingPolicy == null"); Objects.requireNonNull(volumeSet, "volumeSet == null"); Objects.requireNonNull(clusterId, "clusterId == null"); @@ -159,6 +177,20 @@ public void create(VolumeSet volumeSet, VolumeChoosingPolicy try { List volumes = StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList()); + if (storageType != null) { + org.apache.hadoop.fs.StorageType fsStorageType = + org.apache.hadoop.fs.StorageType.valueOf(storageType.name()); + List filtered = volumes.stream() + .filter(v -> v.getStorageType() == fsStorageType) + .collect(Collectors.toList()); + if (!filtered.isEmpty()) { + volumes = filtered; + } else { + LOG.warn("No volumes found with storage type {}, falling back to" + + " all volumes for container {}", storageType, + containerData.getContainerID()); + } + } while (true) { HddsVolume containerVolume; String hddsVolumeDir; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index ef598f3c0cb2..3bea204cc2a3 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -100,6 +100,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ChecksumType; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto; @@ -475,12 +476,20 @@ ContainerCommandResponseProto handleCreateContainer( KeyValueContainer newContainer = new KeyValueContainer( newContainerData, conf); + // Extract storageType for volume selection on heterogeneous nodes. + StorageType requestedStorageType = null; + if (request.getCreateContainer().hasStorageType()) { + requestedStorageType = StorageType.valueOf( + request.getCreateContainer().getStorageType().name()); + } + boolean created = false; Lock containerIdLock = containerCreationLocks.get(containerID); containerIdLock.lock(); try { if (containerSet.getContainer(containerID) == null) { - newContainer.create(volumeSet, volumeChoosingPolicy, clusterId); + newContainer.create(volumeSet, volumeChoosingPolicy, clusterId, + requestedStorageType); if (RECOVERING == newContainer.getContainerState()) { created = containerSet.addContainerByOverwriteMissingContainer(newContainer); } else { diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java index 9e66aaeb067a..b0f6e282b5c6 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java @@ -200,6 +200,202 @@ private void testCreateContainer() throws StorageContainerException { "DB does not exist"); } + @ContainerTestVersionInfo.ContainerTest + public void testCreateContainerWithStorageTypeFiltering( + ContainerTestVersionInfo versionInfo) throws Exception { + init(versionInfo); + + // Create two volumes: one SSD, one DISK + File ssdDir = new File(folder, "ssd"); + File diskDir = new File(folder, "disk"); + assertTrue(ssdDir.mkdirs()); + assertTrue(diskDir.mkdirs()); + + HddsVolume ssdVolume = new HddsVolume.Builder(ssdDir.toString()) + .conf(CONF) + .datanodeUuid(datanodeId.toString()) + .storageType(org.apache.hadoop.fs.StorageType.SSD) + .build(); + HddsVolume diskVolume = new HddsVolume.Builder(diskDir.toString()) + .conf(CONF) + .datanodeUuid(datanodeId.toString()) + .storageType(org.apache.hadoop.fs.StorageType.DISK) + .build(); + + StorageVolumeUtil.checkVolume(ssdVolume, scmId, scmId, CONF, null, null); + StorageVolumeUtil.checkVolume(diskVolume, scmId, scmId, CONF, null, null); + + List mixedVolumes = new ArrayList<>(); + mixedVolumes.add(ssdVolume); + mixedVolumes.add(diskVolume); + + VolumeSet mixedVolumeSet = mock(MutableVolumeSet.class); + when(mixedVolumeSet.getVolumesList()) + .thenAnswer(i -> mixedVolumes.stream() + .map(v -> (StorageVolume) v) + .collect(Collectors.toList())); + + // volumeChoosingPolicy returns the first volume from the filtered list + RoundRobinVolumeChoosingPolicy policy = + mock(RoundRobinVolumeChoosingPolicy.class); + when(policy.chooseVolume(anyList(), anyLong())).thenAnswer( + invocation -> { + List volumes = invocation.getArgument(0); + return volumes.get(0); + }); + + // Request SSD storage type - should only see ssdVolume + KeyValueContainerData ssdContainerData = new KeyValueContainerData(100L, + layout, + (long) StorageUnit.GB.toBytes(5), UUID.randomUUID().toString(), + datanodeId.toString()); + KeyValueContainer ssdContainer = + new KeyValueContainer(ssdContainerData, CONF); + ssdContainer.create(mixedVolumeSet, policy, scmId, + org.apache.hadoop.hdds.protocol.StorageType.SSD); + + assertEquals(ssdVolume, ssdContainerData.getVolume()); + + // Request DISK storage type - should only see diskVolume + KeyValueContainerData diskContainerData = new KeyValueContainerData(101L, + layout, + (long) StorageUnit.GB.toBytes(5), UUID.randomUUID().toString(), + datanodeId.toString()); + KeyValueContainer diskContainer = + new KeyValueContainer(diskContainerData, CONF); + diskContainer.create(mixedVolumeSet, policy, scmId, + org.apache.hadoop.hdds.protocol.StorageType.DISK); + + assertEquals(diskVolume, diskContainerData.getVolume()); + } + + @ContainerTestVersionInfo.ContainerTest + public void testCreateContainerWithStorageTypeFallback( + ContainerTestVersionInfo versionInfo) throws Exception { + init(versionInfo); + + // Create only DISK volumes - no SSD available + File diskDir = new File(folder, "diskonly"); + assertTrue(diskDir.mkdirs()); + + HddsVolume diskVolume = new HddsVolume.Builder(diskDir.toString()) + .conf(CONF) + .datanodeUuid(datanodeId.toString()) + .storageType(org.apache.hadoop.fs.StorageType.DISK) + .build(); + StorageVolumeUtil.checkVolume(diskVolume, scmId, scmId, CONF, null, null); + + List diskOnlyVolumes = new ArrayList<>(); + diskOnlyVolumes.add(diskVolume); + + VolumeSet diskOnlyVolumeSet = mock(MutableVolumeSet.class); + when(diskOnlyVolumeSet.getVolumesList()) + .thenAnswer(i -> diskOnlyVolumes.stream() + .map(v -> (StorageVolume) v) + .collect(Collectors.toList())); + + RoundRobinVolumeChoosingPolicy policy = + mock(RoundRobinVolumeChoosingPolicy.class); + when(policy.chooseVolume(anyList(), anyLong())).thenAnswer( + invocation -> { + List volumes = invocation.getArgument(0); + return volumes.get(0); + }); + + // Request SSD but only DISK is available - should fall back to DISK + KeyValueContainerData fallbackData = new KeyValueContainerData(102L, + layout, + (long) StorageUnit.GB.toBytes(5), UUID.randomUUID().toString(), + datanodeId.toString()); + KeyValueContainer fallbackContainer = + new KeyValueContainer(fallbackData, CONF); + fallbackContainer.create(diskOnlyVolumeSet, policy, scmId, + org.apache.hadoop.hdds.protocol.StorageType.SSD); + + // Should succeed and use the DISK volume as fallback + assertEquals(diskVolume, fallbackData.getVolume()); + } + + @ContainerTestVersionInfo.ContainerTest + public void testCreateContainerWithNullStorageType( + ContainerTestVersionInfo versionInfo) throws Exception { + init(versionInfo); + // Null storageType should behave identically to the original create() + keyValueContainer.create(volumeSet, volumeChoosingPolicy, scmId, null); + keyValueContainerData = keyValueContainer.getContainerData(); + + assertNotNull(keyValueContainerData.getMetadataPath()); + assertNotNull(keyValueContainerData.getChunksPath()); + assertTrue(keyValueContainer.getContainerFile().exists()); + assertTrue(keyValueContainer.getContainerDBFile().exists()); + } + + @ContainerTestVersionInfo.ContainerTest + public void testCreateContainerFilteringPassesOnlyMatchingVolumes( + ContainerTestVersionInfo versionInfo) throws Exception { + init(versionInfo); + + // Create 2 SSD + 1 DISK volumes + File ssd1Dir = new File(folder, "ssd1"); + File ssd2Dir = new File(folder, "ssd2"); + File diskDir = new File(folder, "disk2"); + assertTrue(ssd1Dir.mkdirs()); + assertTrue(ssd2Dir.mkdirs()); + assertTrue(diskDir.mkdirs()); + + HddsVolume ssd1 = new HddsVolume.Builder(ssd1Dir.toString()) + .conf(CONF).datanodeUuid(datanodeId.toString()) + .storageType(org.apache.hadoop.fs.StorageType.SSD).build(); + HddsVolume ssd2 = new HddsVolume.Builder(ssd2Dir.toString()) + .conf(CONF).datanodeUuid(datanodeId.toString()) + .storageType(org.apache.hadoop.fs.StorageType.SSD).build(); + HddsVolume disk = new HddsVolume.Builder(diskDir.toString()) + .conf(CONF).datanodeUuid(datanodeId.toString()) + .storageType(org.apache.hadoop.fs.StorageType.DISK).build(); + + StorageVolumeUtil.checkVolume(ssd1, scmId, scmId, CONF, null, null); + StorageVolumeUtil.checkVolume(ssd2, scmId, scmId, CONF, null, null); + StorageVolumeUtil.checkVolume(disk, scmId, scmId, CONF, null, null); + + List allVolumes = new ArrayList<>(); + allVolumes.add(ssd1); + allVolumes.add(ssd2); + allVolumes.add(disk); + + VolumeSet vs = mock(MutableVolumeSet.class); + when(vs.getVolumesList()) + .thenAnswer(i -> allVolumes.stream() + .map(v -> (StorageVolume) v) + .collect(Collectors.toList())); + + // Capture which volumes the policy actually sees + List> capturedVolumeLists = new ArrayList<>(); + RoundRobinVolumeChoosingPolicy policy = + mock(RoundRobinVolumeChoosingPolicy.class); + when(policy.chooseVolume(anyList(), anyLong())).thenAnswer( + invocation -> { + List volumes = invocation.getArgument(0); + capturedVolumeLists.add(new ArrayList<>(volumes)); + return volumes.get(0); + }); + + KeyValueContainerData data = new KeyValueContainerData(200L, + layout, + (long) StorageUnit.GB.toBytes(5), UUID.randomUUID().toString(), + datanodeId.toString()); + KeyValueContainer container = new KeyValueContainer(data, CONF); + container.create(vs, policy, scmId, + org.apache.hadoop.hdds.protocol.StorageType.SSD); + + // Policy should have received only the 2 SSD volumes, not the DISK one + assertEquals(1, capturedVolumeLists.size()); + List receivedVolumes = capturedVolumeLists.get(0); + assertEquals(2, receivedVolumes.size()); + assertTrue(receivedVolumes.contains(ssd1)); + assertTrue(receivedVolumes.contains(ssd2)); + assertFalse(receivedVolumes.contains(disk)); + } + /** * Tests repair of containers affected by the bug reported in HDDS-6235. */ diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java index 0385564ebf75..e64a166256fd 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java @@ -923,6 +923,190 @@ private static ContainerCommandRequestProto createContainerRequest( .build(); } + @Test + public void testCreateContainerWithStorageType() throws Exception { + final String clusterId = UUID.randomUUID().toString(); + final String datanodeId = UUID.randomUUID().toString(); + conf = new OzoneConfiguration(); + + // Create SSD and DISK volumes + Path ssdPath = tempDir.resolve("ssd"); + Path diskPath = tempDir.resolve("disk"); + Files.createDirectories(ssdPath); + Files.createDirectories(diskPath); + + final ContainerSet containerSet = spy(newContainerSet()); + final MutableVolumeSet volumeSet = mock(MutableVolumeSet.class); + + HddsVolume ssdVolume = new HddsVolume.Builder(ssdPath.toString()) + .conf(conf).clusterID(clusterId).datanodeUuid(datanodeId) + .storageType(org.apache.hadoop.fs.StorageType.SSD) + .volumeSet(volumeSet).build(); + ssdVolume.format(clusterId); + ssdVolume.createWorkingDir(clusterId, null); + ssdVolume.createTmpDirs(clusterId); + + HddsVolume diskVolume = new HddsVolume.Builder(diskPath.toString()) + .conf(conf).clusterID(clusterId).datanodeUuid(datanodeId) + .storageType(org.apache.hadoop.fs.StorageType.DISK) + .volumeSet(volumeSet).build(); + diskVolume.format(clusterId); + diskVolume.createWorkingDir(clusterId, null); + diskVolume.createTmpDirs(clusterId); + + when(volumeSet.getVolumesList()) + .thenReturn(java.util.Arrays.asList(ssdVolume, diskVolume)); + + final ContainerMetrics metrics = ContainerMetrics.create(conf); + try { + final AtomicInteger icrReceived = new AtomicInteger(0); + final KeyValueHandler kvHandler = new KeyValueHandler(conf, + datanodeId, containerSet, volumeSet, metrics, + c -> icrReceived.incrementAndGet(), + new ContainerChecksumTreeManager(conf)); + kvHandler.setClusterID(clusterId); + + // Create container with SSD storageType + ContainerCommandRequestProto ssdRequest = + ContainerCommandRequestProto.newBuilder() + .setCmdType(ContainerProtos.Type.CreateContainer) + .setDatanodeUuid(datanodeId) + .setCreateContainer( + ContainerProtos.CreateContainerRequestProto.newBuilder() + .setContainerType(ContainerType.KeyValueContainer) + .setStorageType(ContainerProtos.StorageTypeProto.SSD) + .build()) + .setContainerID(1L) + .setPipelineID(UUID.randomUUID().toString()) + .build(); + + ContainerCommandResponseProto response = + kvHandler.handleCreateContainer(ssdRequest, null); + assertEquals(ContainerProtos.Result.SUCCESS, response.getResult()); + + // Verify container was placed on SSD volume + Container createdContainer = containerSet.getContainer(1L); + assertNotNull(createdContainer); + assertEquals(ssdVolume, + createdContainer.getContainerData().getVolume()); + + // Create container with DISK storageType + ContainerCommandRequestProto diskRequest = + ContainerCommandRequestProto.newBuilder() + .setCmdType(ContainerProtos.Type.CreateContainer) + .setDatanodeUuid(datanodeId) + .setCreateContainer( + ContainerProtos.CreateContainerRequestProto.newBuilder() + .setContainerType(ContainerType.KeyValueContainer) + .setStorageType(ContainerProtos.StorageTypeProto.DISK) + .build()) + .setContainerID(2L) + .setPipelineID(UUID.randomUUID().toString()) + .build(); + + response = kvHandler.handleCreateContainer(diskRequest, null); + assertEquals(ContainerProtos.Result.SUCCESS, response.getResult()); + + Container diskContainer = containerSet.getContainer(2L); + assertNotNull(diskContainer); + assertEquals(diskVolume, + diskContainer.getContainerData().getVolume()); + } finally { + ssdVolume.getVolumeInfoStats().unregister(); + ssdVolume.getVolumeIOStats().unregister(); + diskVolume.getVolumeInfoStats().unregister(); + diskVolume.getVolumeIOStats().unregister(); + ContainerMetrics.remove(); + } + } + + @Test + public void testStorageTypeProtoSerialization() { + // Verify storageType field round-trips in the proto correctly + ContainerProtos.CreateContainerRequestProto withSsd = + ContainerProtos.CreateContainerRequestProto.newBuilder() + .setContainerType(ContainerType.KeyValueContainer) + .setStorageType(ContainerProtos.StorageTypeProto.SSD) + .build(); + assertTrue(withSsd.hasStorageType()); + assertEquals(ContainerProtos.StorageTypeProto.SSD, + withSsd.getStorageType()); + + ContainerProtos.CreateContainerRequestProto withDisk = + ContainerProtos.CreateContainerRequestProto.newBuilder() + .setContainerType(ContainerType.KeyValueContainer) + .setStorageType(ContainerProtos.StorageTypeProto.DISK) + .build(); + assertEquals(ContainerProtos.StorageTypeProto.DISK, + withDisk.getStorageType()); + + // Without storageType set - backward compatibility + ContainerProtos.CreateContainerRequestProto noStorageType = + ContainerProtos.CreateContainerRequestProto.newBuilder() + .setContainerType(ContainerType.KeyValueContainer) + .build(); + assertFalse(noStorageType.hasStorageType()); + + // Verify round-trip through serialization + byte[] bytes = withSsd.toByteArray(); + try { + ContainerProtos.CreateContainerRequestProto deserialized = + ContainerProtos.CreateContainerRequestProto.parseFrom(bytes); + assertTrue(deserialized.hasStorageType()); + assertEquals(ContainerProtos.StorageTypeProto.SSD, + deserialized.getStorageType()); + } catch (Exception e) { + fail("Proto round-trip failed: " + e.getMessage()); + } + } + + @Test + public void testCreateContainerWithoutStorageType() throws Exception { + final String clusterId = UUID.randomUUID().toString(); + final String datanodeId = UUID.randomUUID().toString(); + conf = new OzoneConfiguration(); + + final ContainerSet containerSet = spy(newContainerSet()); + final MutableVolumeSet volumeSet = mock(MutableVolumeSet.class); + + HddsVolume hddsVolume = new HddsVolume.Builder(tempDir.toString()) + .conf(conf).clusterID(clusterId).datanodeUuid(datanodeId) + .volumeSet(volumeSet).build(); + hddsVolume.format(clusterId); + hddsVolume.createWorkingDir(clusterId, null); + hddsVolume.createTmpDirs(clusterId); + + when(volumeSet.getVolumesList()) + .thenReturn(Collections.singletonList(hddsVolume)); + + final ContainerMetrics metrics = ContainerMetrics.create(conf); + try { + final AtomicInteger icrReceived = new AtomicInteger(0); + final KeyValueHandler kvHandler = new KeyValueHandler(conf, + datanodeId, containerSet, volumeSet, metrics, + c -> icrReceived.incrementAndGet(), + new ContainerChecksumTreeManager(conf)); + kvHandler.setClusterID(clusterId); + + // Create container without storageType (original behavior) + ContainerCommandRequestProto request = createContainerRequest( + datanodeId, 1L); + + ContainerCommandResponseProto response = + kvHandler.handleCreateContainer(request, null); + assertEquals(ContainerProtos.Result.SUCCESS, response.getResult()); + + Container createdContainer = containerSet.getContainer(1L); + assertNotNull(createdContainer); + assertEquals(hddsVolume, + createdContainer.getContainerData().getVolume()); + } finally { + hddsVolume.getVolumeInfoStats().unregister(); + hddsVolume.getVolumeIOStats().unregister(); + ContainerMetrics.remove(); + } + } + private KeyValueHandler createKeyValueHandler(Path path) throws IOException { final ContainerSet containerSet = newContainerSet(); final MutableVolumeSet volumeSet = mock(MutableVolumeSet.class); diff --git a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto index 05c94624c990..6b504aa9b3dc 100644 --- a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto +++ b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto @@ -264,6 +264,17 @@ enum ContainerType { KeyValueContainer = 1; } +/** + * Storage type for volume selection on datanodes. + * Values mirror StorageTypeProto in hdds.proto. + */ +enum StorageTypeProto { + DISK = 1; + SSD = 2; + ARCHIVE = 3; + RAM_DISK = 4; +} + // Container Messages. message CreateContainerRequestProto { @@ -271,6 +282,7 @@ message CreateContainerRequestProto { optional ContainerType containerType = 3 [default = KeyValueContainer]; optional int32 replicaIndex = 4; optional ContainerDataProto.State state = 5; + optional StorageTypeProto storageType = 6; } message CreateContainerResponseProto { diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestKeyManagerImpl.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestKeyManagerImpl.java index dcdb3828ae6d..cbe13f76c6c9 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestKeyManagerImpl.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestKeyManagerImpl.java @@ -227,7 +227,9 @@ public static void setUp() throws Exception { any(ReplicationConfig.class), anyString(), any(ExcludeList.class), - anyString())).thenThrow( + anyString(), + any(org.apache.hadoop.hdds.protocol.StorageType.class))) + .thenThrow( new SCMException("SafeModePrecheck failed for allocateBlock", ResultCodes.SAFE_MODE_EXCEPTION)); createVolume(VOLUME_NAME); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmContainerLocationCache.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmContainerLocationCache.java index 4e69848b307d..158742c446af 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmContainerLocationCache.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmContainerLocationCache.java @@ -719,7 +719,8 @@ private void mockScmAllocationOnDn1(long containerID, any(ReplicationConfig.class), anyString(), any(ExcludeList.class), - anyString())) + anyString(), + any(org.apache.hadoop.hdds.protocol.StorageType.class))) .thenReturn(Collections.singletonList(block)); } @@ -735,7 +736,8 @@ private void mockScmAllocationEcPipeline(long containerID, long localId) any(ECReplicationConfig.class), anyString(), any(ExcludeList.class), - anyString())) + anyString(), + any(org.apache.hadoop.hdds.protocol.StorageType.class))) .thenReturn(Collections.singletonList(block)); } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMFileCreateRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMFileCreateRequest.java index 3004f511480c..e3bdab432d21 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMFileCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMFileCreateRequest.java @@ -730,7 +730,8 @@ public void testZeroSizedFileShouldCallAllocateBlock() throws Exception { verify(scmBlockLocationProtocol, atLeastOnce()) .allocateBlock(anyLong(), anyInt(), any(ReplicationConfig.class), anyString(), - any(ExcludeList.class), anyString()); + any(ExcludeList.class), anyString(), + any(org.apache.hadoop.hdds.protocol.StorageType.class)); // Verify key locations are present in the response assertTrue(modifiedOmRequest.hasCreateFileRequest()); @@ -802,7 +803,8 @@ public void testFileWithoutDataSizeShouldAllocateBlock() throws Exception { verify(scmBlockLocationProtocol, atLeastOnce()) .allocateBlock(anyLong(), anyInt(), any(ReplicationConfig.class), anyString(), - any(ExcludeList.class), anyString()); + any(ExcludeList.class), anyString(), + any(org.apache.hadoop.hdds.protocol.StorageType.class)); // Verify key locations are present in the response assertTrue(modifiedOmRequest.hasCreateFileRequest()); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyCreateRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyCreateRequest.java index 1666f4cb38e6..32f661527754 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyCreateRequest.java @@ -1120,12 +1120,14 @@ public void testEmptyKeyKeyDoesNotCallScmAllocateBlock() throws Exception { verify(scmBlockLocationProtocol, never()) .allocateBlock(anyLong(), anyInt(), any(ReplicationConfig.class), anyString(), - any(ExcludeList.class), anyString()); + any(ExcludeList.class), anyString(), + any(org.apache.hadoop.hdds.protocol.StorageType.class)); verify(scmBlockLocationProtocol, never()) .allocateBlock(anyLong(), anyInt(), any(ReplicationConfig.class), anyString(), - any(ExcludeList.class), anyString()); + any(ExcludeList.class), anyString(), + any(org.apache.hadoop.hdds.protocol.StorageType.class)); assertTrue(modifiedOmRequest.hasCreateKeyRequest()); CreateKeyRequest responseCreateKeyRequest = @@ -1197,12 +1199,14 @@ public void testKeyWithoutDataSizeCallsScmAllocateBlock() throws Exception { verify(scmBlockLocationProtocol, never()) .allocateBlock(anyLong(), anyInt(), any(ReplicationConfig.class), anyString(), - any(ExcludeList.class), anyString()); + any(ExcludeList.class), anyString(), + any(org.apache.hadoop.hdds.protocol.StorageType.class)); verify(scmBlockLocationProtocol, never()) .allocateBlock(anyLong(), anyInt(), any(ReplicationConfig.class), anyString(), - any(ExcludeList.class), anyString()); + any(ExcludeList.class), anyString(), + any(org.apache.hadoop.hdds.protocol.StorageType.class)); assertTrue(modifiedOmRequest.hasCreateKeyRequest()); CreateKeyRequest responseCreateKeyRequest = diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java index 90cf32cd517f..928a9d8b43b8 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java @@ -223,7 +223,9 @@ public void setup() throws Exception { when(scmBlockLocationProtocol.allocateBlock(anyLong(), anyInt(), any(ReplicationConfig.class), anyString(), any(ExcludeList.class), - anyString())).thenAnswer(invocation -> { + anyString(), + any(org.apache.hadoop.hdds.protocol.StorageType.class))) + .thenAnswer(invocation -> { int num = invocation.getArgument(1); List allocatedBlocks = new ArrayList<>(num); for (int i = 0; i < num; i++) {