From ee8c8ab97e96d6a504bd22055901cc4c5b301c18 Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 4 Jun 2024 20:49:23 -0400 Subject: [PATCH 001/126] Add related connector classes for clp_s --- .gitignore | 3 +- pom.xml | 8 ++ presto-clp/pom.xml | 88 ++++++++++++++++++ .../com/yscope/presto/ClpColumnHandle.java | 21 +++++ .../java/com/yscope/presto/ClpConfig.java | 33 +++++++ .../java/com/yscope/presto/ClpConnector.java | 82 +++++++++++++++++ .../yscope/presto/ClpConnectorFactory.java | 64 +++++++++++++ .../com/yscope/presto/ClpHandleResolver.java | 55 ++++++++++++ .../java/com/yscope/presto/ClpMetadata.java | 90 +++++++++++++++++++ .../java/com/yscope/presto/ClpModule.java | 37 ++++++++ .../java/com/yscope/presto/ClpPlugin.java | 28 ++++++ .../com/yscope/presto/ClpRecordCursor.java | 87 ++++++++++++++++++ .../java/com/yscope/presto/ClpRecordSet.java | 36 ++++++++ .../yscope/presto/ClpRecordSetProvider.java | 39 ++++++++ .../main/java/com/yscope/presto/ClpSplit.java | 46 ++++++++++ .../com/yscope/presto/ClpSplitManager.java | 30 +++++++ .../com/yscope/presto/ClpTableHandle.java | 21 +++++ .../yscope/presto/ClpTableLayoutHandle.java | 21 +++++ .../yscope/presto/ClpTransactionHandle.java | 22 +++++ .../test/java/com/yscope/presto/AppTest.java | 18 ++++ 20 files changed, 828 insertions(+), 1 deletion(-) create mode 100644 presto-clp/pom.xml create mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java create mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpConfig.java create mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpConnector.java create mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpConnectorFactory.java create mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpHandleResolver.java create mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java create mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpModule.java create mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpPlugin.java create mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java create mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java create mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java create mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpSplit.java create mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java create mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java create mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpTableLayoutHandle.java create mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpTransactionHandle.java create mode 100644 presto-clp/src/test/java/com/yscope/presto/AppTest.java diff --git a/.gitignore b/.gitignore index a4512f9f794d9..ad1d52762a05f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,8 @@ *.iml *.ipr *.iws -target +target/ +etc/ /var /*/var/ /presto-product-tests/**/var/ diff --git a/pom.xml b/pom.xml index 94c7baf21bc30..85a955ca461b2 100644 --- a/pom.xml +++ b/pom.xml @@ -212,6 +212,7 @@ presto-native-sidecar-plugin presto-base-arrow-flight presto-function-server + presto-clp @@ -761,6 +762,12 @@ ${project.version} + + com.yscope.presto + presto-clp + ${project.version} + + com.facebook.presto presto-expressions @@ -2588,6 +2595,7 @@ + org.alluxio:alluxio-shaded-client org.codehaus.plexus:plexus-utils com.google.guava:guava com.fasterxml.jackson.core:jackson-annotations diff --git a/presto-clp/pom.xml b/presto-clp/pom.xml new file mode 100644 index 0000000000000..5011d203dd560 --- /dev/null +++ b/presto-clp/pom.xml @@ -0,0 +1,88 @@ + + + 4.0.0 + + + com.facebook.presto + presto-root + 0.288-SNAPSHOT + + + com.yscope.presto + presto-clp + Presto CLP connector + presto-plugin + + + UTF-8 + ${project.parent.basedir} + + + + + com.facebook.airlift + bootstrap + + + + com.facebook.airlift + json + + + + com.facebook.airlift + log + + + + com.facebook.airlift + configuration + + + + com.google.inject + guice + + + + com.google.guava + guava + + + + javax.inject + javax.inject + + + + com.fasterxml.jackson.core + jackson-annotations + provided + + + + com.facebook.presto + presto-spi + provided + + + + com.facebook.presto + presto-common + provided + + + + io.airlift + units + provided + + + + io.airlift + slice + provided + + + diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java b/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java new file mode 100644 index 0000000000000..c462a7b95e9bf --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java @@ -0,0 +1,21 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +import com.facebook.presto.spi.ColumnHandle; + +public class ClpColumnHandle + implements ColumnHandle +{ +} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java new file mode 100644 index 0000000000000..8ac069c782ad7 --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java @@ -0,0 +1,33 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +import com.facebook.airlift.configuration.Config; + +public class ClpConfig +{ + private String clpArchiveDir; + + public String getClpArchiveDir() + { + return clpArchiveDir; + } + + @Config("archive-dir") + public ClpConfig setClpArchiveDir(String clpArchiveDir) + { + this.clpArchiveDir = clpArchiveDir; + return this; + } +} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java b/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java new file mode 100644 index 0000000000000..5336fec617866 --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java @@ -0,0 +1,82 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +import com.facebook.airlift.bootstrap.LifeCycleManager; +import com.facebook.airlift.log.Logger; +import com.facebook.presto.spi.connector.Connector; +import com.facebook.presto.spi.connector.ConnectorMetadata; +import com.facebook.presto.spi.connector.ConnectorRecordSetProvider; +import com.facebook.presto.spi.connector.ConnectorSplitManager; +import com.facebook.presto.spi.connector.ConnectorTransactionHandle; +import com.facebook.presto.spi.transaction.IsolationLevel; + +import javax.inject.Inject; + +import static java.util.Objects.requireNonNull; + +public class ClpConnector + implements Connector +{ + private static final Logger log = Logger.get(ClpConnector.class); + + private final LifeCycleManager lifeCycleManager; + private final ClpMetadata metadata; + private final ClpSplitManager splitManager; + private final ClpRecordSetProvider recordSetProvider; + + @Inject + public ClpConnector(LifeCycleManager lifeCycleManager, ClpMetadata metadata, ClpSplitManager splitManager, ClpRecordSetProvider recordSetProvider) + { + this.lifeCycleManager = requireNonNull(lifeCycleManager, "lifeCycleManager is null"); + this.metadata = requireNonNull(metadata, "metadata is null"); + this.splitManager = requireNonNull(splitManager, "splitManager is null"); + this.recordSetProvider = requireNonNull(recordSetProvider, "recordSetProvider is null"); + } + + @Override + public ConnectorTransactionHandle beginTransaction(IsolationLevel isolationLevel, boolean readOnly) + { + return ClpTransactionHandle.INSTANCE; + } + + @Override + public ConnectorMetadata getMetadata(ConnectorTransactionHandle transactionHandle) + { + return metadata; + } + + @Override + public ConnectorSplitManager getSplitManager() + { + return splitManager; + } + + @Override + public ConnectorRecordSetProvider getRecordSetProvider() + { + return recordSetProvider; + } + + @Override + public final void shutdown() + { + try { + lifeCycleManager.stop(); + } + catch (Exception e) { + log.error(e, "Error shutting down connector"); + } + } +} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpConnectorFactory.java b/presto-clp/src/main/java/com/yscope/presto/ClpConnectorFactory.java new file mode 100644 index 0000000000000..ae4fd6d09b9b2 --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/ClpConnectorFactory.java @@ -0,0 +1,64 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +import com.facebook.airlift.bootstrap.Bootstrap; +import com.facebook.airlift.json.JsonModule; +import com.facebook.presto.common.type.TypeManager; +import com.facebook.presto.spi.ConnectorHandleResolver; +import com.facebook.presto.spi.NodeManager; +import com.facebook.presto.spi.connector.Connector; +import com.facebook.presto.spi.connector.ConnectorContext; +import com.facebook.presto.spi.connector.ConnectorFactory; +import com.google.inject.Injector; + +import java.util.Map; + +import static java.util.Objects.requireNonNull; + +public class ClpConnectorFactory + implements ConnectorFactory +{ + @Override + public String getName() + { + return "clp"; + } + + @Override + public ConnectorHandleResolver getHandleResolver() + { + return new ClpHandleResolver(); + } + + @Override + public Connector create(String catalogName, Map config, ConnectorContext context) + { + requireNonNull(catalogName, "catalogName is null"); + requireNonNull(config, "config is null"); + try { + Bootstrap app = new Bootstrap(new JsonModule(), new ClpModule(), binder -> { + binder.bind(TypeManager.class).toInstance(context.getTypeManager()); + binder.bind(NodeManager.class).toInstance(context.getNodeManager()); + }); + + Injector injector = app.doNotInitializeLogging().setRequiredConfigurationProperties(config).initialize(); + + return injector.getInstance(ClpConnector.class); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } +} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpHandleResolver.java b/presto-clp/src/main/java/com/yscope/presto/ClpHandleResolver.java new file mode 100644 index 0000000000000..c281ba3230e90 --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/ClpHandleResolver.java @@ -0,0 +1,55 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +import com.facebook.presto.spi.ColumnHandle; +import com.facebook.presto.spi.ConnectorHandleResolver; +import com.facebook.presto.spi.ConnectorSplit; +import com.facebook.presto.spi.ConnectorTableHandle; +import com.facebook.presto.spi.ConnectorTableLayoutHandle; +import com.facebook.presto.spi.connector.ConnectorTransactionHandle; + +public class ClpHandleResolver + implements ConnectorHandleResolver +{ + @Override + public Class getTableHandleClass() + { + return ClpTableHandle.class; + } + + @Override + public Class getTableLayoutHandleClass() + { + return ClpTableLayoutHandle.class; + } + + @Override + public Class getColumnHandleClass() + { + return ClpColumnHandle.class; + } + + @Override + public Class getSplitClass() + { + return ClpSplit.class; + } + + @Override + public Class getTransactionHandleClass() + { + return ClpTransactionHandle.class; + } +} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java b/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java new file mode 100644 index 0000000000000..b0813dd391ffc --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java @@ -0,0 +1,90 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +import com.facebook.presto.common.type.BigintType; +import com.facebook.presto.common.type.VarcharType; +import com.facebook.presto.spi.ColumnHandle; +import com.facebook.presto.spi.ColumnMetadata; +import com.facebook.presto.spi.ConnectorSession; +import com.facebook.presto.spi.ConnectorTableHandle; +import com.facebook.presto.spi.ConnectorTableLayout; +import com.facebook.presto.spi.ConnectorTableLayoutHandle; +import com.facebook.presto.spi.ConnectorTableMetadata; +import com.facebook.presto.spi.SchemaTableName; +import com.facebook.presto.spi.SchemaTablePrefix; +import com.facebook.presto.spi.connector.ConnectorMetadata; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; + +import java.util.List; +import java.util.Map; +import java.util.Optional; + +public class ClpMetadata + implements ConnectorMetadata +{ + @Override + public List listSchemaNames(ConnectorSession session) + { + return ImmutableList.of("default"); + } + + @Override + public List listTables(ConnectorSession session, Optional schemaName) + { + return ImmutableList.of(new SchemaTableName("default", "example")); + } + + @Override + public ConnectorTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName) + { + return new ClpTableHandle(); + } + + @Override + public ConnectorTableLayout getTableLayout(ConnectorSession session, ConnectorTableLayoutHandle handle) + { + return new ConnectorTableLayout(handle); + } + + @Override + public ConnectorTableMetadata getTableMetadata(ConnectorSession session, ConnectorTableHandle table) + { + return new ConnectorTableMetadata(new SchemaTableName("default", "example"), ImmutableList.of( + new ColumnMetadata("column1", VarcharType.VARCHAR), + new ColumnMetadata("column2", BigintType.BIGINT))); + } + + @Override + public Map> listTableColumns(ConnectorSession session, SchemaTablePrefix prefix) + { + return ImmutableMap.of(new SchemaTableName("default", "example"), ImmutableList.of( + new ColumnMetadata("column1", VarcharType.VARCHAR), + new ColumnMetadata("column2", BigintType.BIGINT))); + } + + @Override + public Map getColumnHandles(ConnectorSession session, ConnectorTableHandle tableHandle) + { + return ImmutableMap.of("column1", new ClpColumnHandle()); + } + + @Override + public ColumnMetadata getColumnMetadata(ConnectorSession session, ConnectorTableHandle tableHandle, ColumnHandle columnHandle) + { + ClpColumnHandle clpColumnHandle = (ClpColumnHandle) columnHandle; + return new ColumnMetadata("column1", VarcharType.VARCHAR); + } +} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpModule.java b/presto-clp/src/main/java/com/yscope/presto/ClpModule.java new file mode 100644 index 0000000000000..59ccd5925f7b9 --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/ClpModule.java @@ -0,0 +1,37 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +import com.google.inject.Binder; +import com.google.inject.Module; +import com.google.inject.Scopes; + +import static com.facebook.airlift.configuration.ConfigBinder.configBinder; + +public class ClpModule + implements Module +{ + @Override + public void configure(Binder binder) + { + binder.bind(ClpConnector.class).in(Scopes.SINGLETON); + binder.bind(ClpMetadata.class).in(Scopes.SINGLETON); + binder.bind(ClpSplitManager.class).in(Scopes.SINGLETON); + binder.bind(ClpRecordSetProvider.class).in(Scopes.SINGLETON); + // TODO: bind ClpClient + configBinder(binder).bindConfig(ClpConfig.class); + } + + // TODO: type deserializer +} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpPlugin.java b/presto-clp/src/main/java/com/yscope/presto/ClpPlugin.java new file mode 100644 index 0000000000000..97d86dc0c2cba --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/ClpPlugin.java @@ -0,0 +1,28 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +import com.facebook.presto.spi.Plugin; +import com.facebook.presto.spi.connector.ConnectorFactory; +import com.google.common.collect.ImmutableList; + +public class ClpPlugin + implements Plugin +{ + @Override + public Iterable getConnectorFactories() + { + return ImmutableList.of(new ClpConnectorFactory()); + } +} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java new file mode 100644 index 0000000000000..4c0715286db47 --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java @@ -0,0 +1,87 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +import com.facebook.presto.common.type.Type; +import com.facebook.presto.spi.RecordCursor; +import io.airlift.slice.Slice; + +public class ClpRecordCursor + implements RecordCursor +{ + @Override + public long getCompletedBytes() + { + return 0; + } + + @Override + public long getReadTimeNanos() + { + return 0; + } + + @Override + public Type getType(int field) + { + return null; + } + + @Override + public boolean advanceNextPosition() + { + return false; + } + + @Override + public boolean getBoolean(int field) + { + return false; + } + + @Override + public long getLong(int field) + { + return 0; + } + + @Override + public double getDouble(int field) + { + return 0; + } + + @Override + public Slice getSlice(int field) + { + return null; + } + + @Override + public Object getObject(int field) + { + throw new UnsupportedOperationException(); + } + + @Override + public boolean isNull(int field) + { + return false; + } + + @Override + public void close() + { + } +} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java new file mode 100644 index 0000000000000..03c2d6c51a415 --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java @@ -0,0 +1,36 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +import com.facebook.presto.common.type.Type; +import com.facebook.presto.spi.RecordCursor; +import com.facebook.presto.spi.RecordSet; + +import java.util.List; + +public class ClpRecordSet + implements RecordSet +{ + @Override + public List getColumnTypes() + { + return null; + } + + @Override + public RecordCursor cursor() + { + return null; + } +} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java new file mode 100644 index 0000000000000..0f4f705c871f6 --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java @@ -0,0 +1,39 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +import com.facebook.presto.spi.ColumnHandle; +import com.facebook.presto.spi.ConnectorSession; +import com.facebook.presto.spi.ConnectorSplit; +import com.facebook.presto.spi.RecordSet; +import com.facebook.presto.spi.connector.ConnectorRecordSetProvider; +import com.facebook.presto.spi.connector.ConnectorTransactionHandle; + +import javax.inject.Inject; + +import java.util.List; + +public class ClpRecordSetProvider + implements ConnectorRecordSetProvider +{ + @Inject + public ClpRecordSetProvider() + { + } + + public RecordSet getRecordSet(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorSplit split, List columns) + { + return null; + } +} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java b/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java new file mode 100644 index 0000000000000..23adf54e9f19c --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java @@ -0,0 +1,46 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +import com.facebook.presto.spi.ConnectorSplit; +import com.facebook.presto.spi.HostAddress; +import com.facebook.presto.spi.NodeProvider; +import com.facebook.presto.spi.schedule.NodeSelectionStrategy; +import com.google.common.collect.ImmutableList; + +import java.util.List; + +import static com.facebook.presto.spi.schedule.NodeSelectionStrategy.NO_PREFERENCE; + +public class ClpSplit + implements ConnectorSplit +{ + @Override + public NodeSelectionStrategy getNodeSelectionStrategy() + { + return NO_PREFERENCE; + } + + @Override + public List getPreferredNodes(NodeProvider nodeProvider) + { + return ImmutableList.of(); + } + + @Override + public Object getInfo() + { + return this; + } +} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java b/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java new file mode 100644 index 0000000000000..d4b5c2af30936 --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java @@ -0,0 +1,30 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +import com.facebook.presto.spi.ConnectorSession; +import com.facebook.presto.spi.ConnectorSplitSource; +import com.facebook.presto.spi.ConnectorTableLayoutHandle; +import com.facebook.presto.spi.connector.ConnectorSplitManager; +import com.facebook.presto.spi.connector.ConnectorTransactionHandle; + +public class ClpSplitManager + implements ConnectorSplitManager +{ + @Override + public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableLayoutHandle layout, SplitSchedulingContext splitSchedulingContext) + { + return null; + } +} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java b/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java new file mode 100644 index 0000000000000..d94db1bc0b754 --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java @@ -0,0 +1,21 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +import com.facebook.presto.spi.ConnectorTableHandle; + +public class ClpTableHandle + implements ConnectorTableHandle +{ +} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpTableLayoutHandle.java b/presto-clp/src/main/java/com/yscope/presto/ClpTableLayoutHandle.java new file mode 100644 index 0000000000000..d44c82aaa31f5 --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/ClpTableLayoutHandle.java @@ -0,0 +1,21 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +import com.facebook.presto.spi.ConnectorTableLayoutHandle; + +public class ClpTableLayoutHandle + implements ConnectorTableLayoutHandle +{ +} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpTransactionHandle.java b/presto-clp/src/main/java/com/yscope/presto/ClpTransactionHandle.java new file mode 100644 index 0000000000000..3b7b47e55bc06 --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/ClpTransactionHandle.java @@ -0,0 +1,22 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +import com.facebook.presto.spi.connector.ConnectorTransactionHandle; + +public enum ClpTransactionHandle + implements ConnectorTransactionHandle +{ + INSTANCE +} diff --git a/presto-clp/src/test/java/com/yscope/presto/AppTest.java b/presto-clp/src/test/java/com/yscope/presto/AppTest.java new file mode 100644 index 0000000000000..bd6b5092d4a8b --- /dev/null +++ b/presto-clp/src/test/java/com/yscope/presto/AppTest.java @@ -0,0 +1,18 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +public class AppTest +{ +} From f38f9628139b36ee9d4e22e8afc123d3ef24e4b1 Mon Sep 17 00:00:00 2001 From: wraymo Date: Wed, 5 Jun 2024 11:01:43 -0400 Subject: [PATCH 002/126] add a constructor for ClpPlugin --- .../src/main/java/com/yscope/presto/ClpPlugin.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpPlugin.java b/presto-clp/src/main/java/com/yscope/presto/ClpPlugin.java index 97d86dc0c2cba..f9325e0698703 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpPlugin.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpPlugin.java @@ -20,9 +20,16 @@ public class ClpPlugin implements Plugin { + private final ConnectorFactory connectorFactory; + + public ClpPlugin() + { + connectorFactory = new ClpConnectorFactory(); + } + @Override public Iterable getConnectorFactories() { - return ImmutableList.of(new ClpConnectorFactory()); + return ImmutableList.of(connectorFactory); } } From 8b71203e895661f2d0d36102daeb763f94958b55 Mon Sep 17 00:00:00 2001 From: wraymo Date: Wed, 5 Jun 2024 16:20:43 -0400 Subject: [PATCH 003/126] add ClpClient; add methods to get all tables --- .gitignore | 1 + .../java/com/yscope/presto/ClpClient.java | 53 +++++++++++++++++++ .../java/com/yscope/presto/ClpMetadata.java | 15 +++++- 3 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpClient.java diff --git a/.gitignore b/.gitignore index ad1d52762a05f..8b83d8075e311 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ *.iws target/ etc/ +data/ /var /*/var/ /presto-product-tests/**/var/ diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java new file mode 100644 index 0000000000000..df49f756b2e7d --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -0,0 +1,53 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +import com.google.common.collect.ImmutableSet; +import com.google.inject.Inject; + +import java.io.File; +import java.util.Set; + +public class ClpClient +{ + private final ClpConfig config; + + @Inject + public ClpClient(ClpConfig config) + { + this.config = config; + } + + public Set listTables() + { + File archiveDir = new File(config.getClpArchiveDir()); + if (!archiveDir.exists() || !archiveDir.isDirectory()) { + return ImmutableSet.of(); + } + + File[] files = archiveDir.listFiles(); + if (files == null) { + return ImmutableSet.of(); + } + + ImmutableSet.Builder tableNames = ImmutableSet.builder(); + for (File file : files) { + if (file.isDirectory()) { + tableNames.add(file.getName()); + } + } + + return tableNames.build(); + } +} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java b/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java index b0813dd391ffc..512d7c3c47aeb 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java @@ -27,6 +27,7 @@ import com.facebook.presto.spi.connector.ConnectorMetadata; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.inject.Inject; import java.util.List; import java.util.Map; @@ -35,6 +36,14 @@ public class ClpMetadata implements ConnectorMetadata { + private final ClpClient clpClient; + + @Inject + public ClpMetadata(ClpClient clpClient) + { + this.clpClient = clpClient; + } + @Override public List listSchemaNames(ConnectorSession session) { @@ -44,7 +53,11 @@ public List listSchemaNames(ConnectorSession session) @Override public List listTables(ConnectorSession session, Optional schemaName) { - return ImmutableList.of(new SchemaTableName("default", "example")); + ImmutableList.Builder builder = ImmutableList.builder(); + for (String tableName : clpClient.listTables()) { + builder.add(new SchemaTableName("default", tableName)); + } + return builder.build(); } @Override From 3b49f9a751efa9bb747cd2d7490fbd1fafadb492 Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 6 Jun 2024 20:25:07 -0400 Subject: [PATCH 004/126] add schema tree implementation --- .../java/com/yscope/presto/ClpClient.java | 22 ++++ .../com/yscope/presto/ClpColumnHandle.java | 76 +++++++++++++ .../java/com/yscope/presto/ClpMetadata.java | 17 ++- .../com/yscope/presto/ClpTableHandle.java | 40 +++++++ .../com/yscope/presto/schema/SchemaNode.java | 102 ++++++++++++++++++ .../com/yscope/presto/schema/SchemaTree.java | 57 ++++++++++ 6 files changed, 311 insertions(+), 3 deletions(-) create mode 100644 presto-clp/src/main/java/com/yscope/presto/schema/SchemaNode.java create mode 100644 presto-clp/src/main/java/com/yscope/presto/schema/SchemaTree.java diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index df49f756b2e7d..959a2708efb6a 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -50,4 +50,26 @@ public Set listTables() return tableNames.build(); } + + public Set listColumns(String tableName) + { + File tableDir = new File(config.getClpArchiveDir(), tableName); + if (!tableDir.exists() || !tableDir.isDirectory()) { + return ImmutableSet.of(); + } + + File[] files = tableDir.listFiles(); + if (files == null) { + return ImmutableSet.of(); + } + + ImmutableSet.Builder columnNames = ImmutableSet.builder(); + for (File file : files) { + if (file.isFile()) { + columnNames.add(file.getName()); + } + } + + return columnNames.build(); + } } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java b/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java index c462a7b95e9bf..39a7da1d91a5b 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java @@ -13,9 +13,85 @@ */ package com.yscope.presto; +import com.facebook.presto.common.type.Type; import com.facebook.presto.spi.ColumnHandle; +import com.facebook.presto.spi.ColumnMetadata; +import com.fasterxml.jackson.annotation.JsonProperty; + +import java.util.Objects; + +import static com.google.common.base.MoreObjects.toStringHelper; public class ClpColumnHandle implements ColumnHandle { + private final String columnName; + private final Type columnType; + private final boolean nullable; + + public ClpColumnHandle( + @JsonProperty("columnName") String columnName, + @JsonProperty("columnType") Type columnType, + @JsonProperty("nullable") boolean nullable) + { + this.columnName = columnName; + this.columnType = columnType; + this.nullable = nullable; + } + + @JsonProperty + public String getColumnName() + { + return columnName; + } + + @JsonProperty + public Type getColumnType() + { + return columnType; + } + + @JsonProperty + public boolean isNullable() + { + return nullable; + } + + public ColumnMetadata getColumnMetadata() + { + ColumnMetadata.Builder builder = ColumnMetadata.builder() + .setName(columnName) + .setType(columnType) + .setNullable(nullable); + return builder.build(); + } + + @Override + public int hashCode() + { + return Objects.hash(columnName); + } + + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + ClpColumnHandle other = (ClpColumnHandle) obj; + return Objects.equals(this.columnName, other.columnName); + } + + @Override + public String toString() + { + return toStringHelper(this) + .add("columnName", columnName) + .add("columnType", columnType) + .add("nullable", nullable) + .toString(); + } } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java b/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java index 512d7c3c47aeb..7972b66ebd802 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java @@ -63,7 +63,15 @@ public List listTables(ConnectorSession session, Optional> listTableColumns(ConnectorSess @Override public Map getColumnHandles(ConnectorSession session, ConnectorTableHandle tableHandle) { - return ImmutableMap.of("column1", new ClpColumnHandle()); + ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle; + clpClient.listColumns(clpTableHandle.getTableName()); + + return ImmutableMap.of("column1", new ClpColumnHandle(ta)); } @Override public ColumnMetadata getColumnMetadata(ConnectorSession session, ConnectorTableHandle tableHandle, ColumnHandle columnHandle) { ClpColumnHandle clpColumnHandle = (ClpColumnHandle) columnHandle; - return new ColumnMetadata("column1", VarcharType.VARCHAR); + return clpColumnHandle.getColumnMetadata(); } } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java b/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java index d94db1bc0b754..84181ec239ef8 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java @@ -14,8 +14,48 @@ package com.yscope.presto; import com.facebook.presto.spi.ConnectorTableHandle; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; public class ClpTableHandle implements ConnectorTableHandle { + private final String tableName; + + @JsonCreator + public ClpTableHandle(@JsonProperty("tableName") String tableName) + { + this.tableName = tableName; + } + + @JsonProperty + public String getTableName() + { + return tableName; + } + + @Override + public int hashCode() + { + return Objects.hash(tableName); + } + + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + ClpTableHandle other = (ClpTableHandle) obj; + return this.tableName.equals(other.tableName); + } + + @Override + public String toString() + { + return tableName; + } } diff --git a/presto-clp/src/main/java/com/yscope/presto/schema/SchemaNode.java b/presto-clp/src/main/java/com/yscope/presto/schema/SchemaNode.java new file mode 100644 index 0000000000000..46d4efaa96a8c --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/schema/SchemaNode.java @@ -0,0 +1,102 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto.schema; + +import java.util.ArrayList; +import java.util.Objects; + +public class SchemaNode +{ + private final int id; + private final int parentId; + private final ArrayList childrenIds; + private final String name; + private final NodeType type; + public SchemaNode(int id, int parentId, String name, NodeType type) + { + this.id = id; + this.parentId = parentId; + this.name = name; + this.type = type; + this.childrenIds = new ArrayList<>(); + } + + public String getName() + { + return name; + } + + public NodeType getType() + { + return type; + } + + public int getId() + { + return id; + } + + public int getParentId() + { + return parentId; + } + + public void addChild(int id) + { + childrenIds.add(id); + } + + public ArrayList getChildrenIds() + { + return childrenIds; + } + + public enum NodeType + { + Integer, Float, ClpString, VarString, Boolean, Object, UnstructuredArray, NullValue, DateString, StructuredArray + } + + public static class NodeTuple + { + private final int parentId; + private final String name; + private final NodeType type; + + public NodeTuple(int parentId, String name, NodeType type) + { + this.parentId = parentId; + this.name = name; + this.type = type; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + NodeTuple tuple = (NodeTuple) o; + return Objects.equals(type, tuple.type) && Objects.equals(parentId, tuple.parentId) && Objects.equals(name, tuple.name); + } + + @Override + public int hashCode() + { + return Objects.hash(type, parentId, name); + } + } +} diff --git a/presto-clp/src/main/java/com/yscope/presto/schema/SchemaTree.java b/presto-clp/src/main/java/com/yscope/presto/schema/SchemaTree.java new file mode 100644 index 0000000000000..ccfb46de85b35 --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/schema/SchemaTree.java @@ -0,0 +1,57 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto.schema; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +public class SchemaTree +{ + private final ArrayList schemaNodes; + private final Map nodeMap; + private final ArrayList primitiveTypeIds; + + public SchemaTree() + { + schemaNodes = new ArrayList<>(); + primitiveTypeIds = new ArrayList<>(); + nodeMap = new HashMap(); + } + + public int addNode(int parentId, String name, SchemaNode.NodeType type) + { + SchemaNode.NodeTuple tuple = new SchemaNode.NodeTuple(parentId, name, type); + if (nodeMap.containsKey(tuple)) { + return nodeMap.get(tuple); + } + + int id = schemaNodes.size(); + schemaNodes.add(new SchemaNode(id, parentId, name, type)); + nodeMap.put(tuple, id); + + if (parentId >= 0) { + schemaNodes.get(parentId).addChild(id); + } + if (type != SchemaNode.NodeType.Object) { + primitiveTypeIds.add(id); + } + return id; + } + + public getPrimitiveFields() + { + return primitiveTypeIds; + } +} From 57dab45548c6783c7d5381727f31baf81a9e1549 Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 7 Jun 2024 15:01:53 -0400 Subject: [PATCH 005/126] add basic metadata operations --- presto-clp/pom.xml | 5 + .../java/com/yscope/presto/ClpClient.java | 162 +++++++++++++++--- .../com/yscope/presto/ClpColumnHandle.java | 5 +- .../java/com/yscope/presto/ClpConfig.java | 13 ++ .../java/com/yscope/presto/ClpMetadata.java | 28 ++- .../com/yscope/presto/ClpTableHandle.java | 2 + .../com/yscope/presto/schema/SchemaNode.java | 58 ++++++- .../com/yscope/presto/schema/SchemaTree.java | 23 ++- 8 files changed, 254 insertions(+), 42 deletions(-) diff --git a/presto-clp/pom.xml b/presto-clp/pom.xml index 5011d203dd560..91a36885399e3 100644 --- a/presto-clp/pom.xml +++ b/presto-clp/pom.xml @@ -61,6 +61,11 @@ provided + + com.github.luben + zstd-jni + + com.facebook.presto presto-spi diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 959a2708efb6a..d7eb85f634ba7 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -13,63 +13,179 @@ */ package com.yscope.presto; +import com.facebook.presto.common.type.BooleanType; +import com.facebook.presto.common.type.DoubleType; +import com.facebook.presto.common.type.IntegerType; +import com.facebook.presto.common.type.Type; +import com.facebook.presto.common.type.VarcharType; +import com.github.luben.zstd.ZstdInputStream; import com.google.common.collect.ImmutableSet; import com.google.inject.Inject; +import com.yscope.presto.schema.SchemaNode; +import com.yscope.presto.schema.SchemaTree; -import java.io.File; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.DirectoryStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; import java.util.Set; public class ClpClient { private final ClpConfig config; + private final Map> tableNameToColumnHandles; @Inject public ClpClient(ClpConfig config) { this.config = config; + this.tableNameToColumnHandles = new HashMap<>(); } public Set listTables() { - File archiveDir = new File(config.getClpArchiveDir()); - if (!archiveDir.exists() || !archiveDir.isDirectory()) { + Path archiveDir = Paths.get(config.getClpArchiveDir()); + if (!Files.exists(archiveDir) || !Files.isDirectory(archiveDir)) { return ImmutableSet.of(); } - File[] files = archiveDir.listFiles(); - if (files == null) { + try (DirectoryStream stream = Files.newDirectoryStream(archiveDir)) { + ImmutableSet.Builder tableNames = ImmutableSet.builder(); + for (Path path : stream) { + if (Files.isDirectory(path)) { + tableNames.add(path.getFileName().toString()); + } + } + return tableNames.build(); + } + catch (Exception e) { return ImmutableSet.of(); } + } - ImmutableSet.Builder tableNames = ImmutableSet.builder(); - for (File file : files) { - if (file.isDirectory()) { - tableNames.add(file.getName()); + public Set listColumns(String tableName) + { + if (tableNameToColumnHandles.containsKey(tableName)) { + return tableNameToColumnHandles.get(tableName); + } + + Path tableDir = Paths.get(config.getClpArchiveDir(), tableName); + HashSet columnHandles = new HashSet<>(); + if (!Files.exists(tableDir) || !Files.isDirectory(tableDir)) { + return ImmutableSet.of(); + } + + try (DirectoryStream stream = Files.newDirectoryStream(tableDir)) { + ImmutableSet.Builder columnNames = ImmutableSet.builder(); + for (Path path : stream) { + if (Files.isRegularFile(path)) { + continue; + } + + // For each directory, get schema_maps file under it + Path schemaMapsFile = path.resolve("schema_tree"); + if (!Files.exists(schemaMapsFile) || !Files.isRegularFile(schemaMapsFile)) { + continue; + } + + columnHandles.addAll(parseSchemaTreeFile(schemaMapsFile)); } } + catch (Exception e) { + tableNameToColumnHandles.put(tableName, ImmutableSet.of()); + return ImmutableSet.of(); + } - return tableNames.build(); + if (!config.isPolymorphicTypeEnabled()) { + tableNameToColumnHandles.put(tableName, columnHandles); + return columnHandles; + } + Set polymorphicColumnHandles = handlePolymorphicType(columnHandles); + tableNameToColumnHandles.put(tableName, polymorphicColumnHandles); + return polymorphicColumnHandles; } - public Set listColumns(String tableName) + private Set parseSchemaTreeFile(Path schemaMapsFile) { - File tableDir = new File(config.getClpArchiveDir(), tableName); - if (!tableDir.exists() || !tableDir.isDirectory()) { - return ImmutableSet.of(); - } + SchemaTree schemaTree = new SchemaTree(); + try (InputStream fileInputStream = Files.newInputStream(schemaMapsFile); + ZstdInputStream zstdInputStream = new ZstdInputStream(fileInputStream); + DataInputStream dataInputStream = new DataInputStream(zstdInputStream)) { + long numberOfNodes = dataInputStream.readLong(); + for (int i = 0; i < numberOfNodes; i++) { + int parentId = dataInputStream.readInt(); + long stringSize = dataInputStream.readLong(); + byte[] stringBytes = new byte[(int) stringSize]; + dataInputStream.readFully(stringBytes); + String name = new String(stringBytes, StandardCharsets.UTF_8); + SchemaNode.NodeType type = SchemaNode.NodeType.fromType(dataInputStream.readByte()); + schemaTree.addNode(parentId, name, type); + } - File[] files = tableDir.listFiles(); - if (files == null) { + ArrayList primitiveTypeFields = schemaTree.getPrimitiveFields(); + HashSet columnHandles = new HashSet<>(); + for (SchemaNode.NodeTuple nodeTuple : primitiveTypeFields) { + SchemaNode.NodeType nodeType = nodeTuple.getType(); + Type prestoType = null; + switch (nodeType) { + case Integer: + prestoType = IntegerType.INTEGER; + break; + case Float: + prestoType = DoubleType.DOUBLE; + break; + case ClpString: + case VarString: + case DateString: + prestoType = VarcharType.VARCHAR; + break; + case Boolean: + prestoType = BooleanType.BOOLEAN; + break; + default: + break; + } + columnHandles.add(new ClpColumnHandle(nodeTuple.getName(), prestoType, true)); + } + return columnHandles; + } + catch (IOException e) { return ImmutableSet.of(); } + } + + private Set handlePolymorphicType(Set columnHandles) + { + Map> columnNameToColumnHandles = new HashMap<>(); + Set polymorphicColumnHandles = new HashSet<>(); - ImmutableSet.Builder columnNames = ImmutableSet.builder(); - for (File file : files) { - if (file.isFile()) { - columnNames.add(file.getName()); + for (ClpColumnHandle columnHandle : columnHandles) { + columnNameToColumnHandles.computeIfAbsent(columnHandle.getColumnName(), k -> new ArrayList<>()) + .add(columnHandle); + } + for (Map.Entry> entry : columnNameToColumnHandles.entrySet()) { + List columnHandleList = entry.getValue(); + if (columnHandleList.size() == 1) { + polymorphicColumnHandles.add(columnHandleList.get(0)); + } + else { + for (ClpColumnHandle columnHandle : columnHandleList) { + polymorphicColumnHandles.add(new ClpColumnHandle( + columnHandle.getColumnName() + "_" + columnHandle.getColumnType().getDisplayName(), + columnHandle.getColumnType(), + columnHandle.isNullable())); + } } } - - return columnNames.build(); + return polymorphicColumnHandles; } } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java b/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java index 39a7da1d91a5b..5ad39ece53ebf 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java @@ -69,7 +69,7 @@ public ColumnMetadata getColumnMetadata() @Override public int hashCode() { - return Objects.hash(columnName); + return Objects.hash(columnName, columnType); } @Override @@ -82,7 +82,8 @@ public boolean equals(Object obj) return false; } ClpColumnHandle other = (ClpColumnHandle) obj; - return Objects.equals(this.columnName, other.columnName); + return Objects.equals(this.columnName, other.columnName) && + Objects.equals(this.columnType, other.columnType); } @Override diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java index 8ac069c782ad7..1745a7231f95a 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java @@ -18,6 +18,7 @@ public class ClpConfig { private String clpArchiveDir; + private boolean polymorphicTypeEnabled; public String getClpArchiveDir() { @@ -30,4 +31,16 @@ public ClpConfig setClpArchiveDir(String clpArchiveDir) this.clpArchiveDir = clpArchiveDir; return this; } + + public boolean isPolymorphicTypeEnabled() + { + return polymorphicTypeEnabled; + } + + @Config("polymorphic-type-enabled") + public ClpConfig setPolymorphicTypeEnabled(boolean polymorphicTypeEnabled) + { + this.polymorphicTypeEnabled = polymorphicTypeEnabled; + return this; + } } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java b/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java index 7972b66ebd802..6eb2e5b70170b 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java @@ -83,17 +83,18 @@ public ConnectorTableLayout getTableLayout(ConnectorSession session, ConnectorTa @Override public ConnectorTableMetadata getTableMetadata(ConnectorSession session, ConnectorTableHandle table) { - return new ConnectorTableMetadata(new SchemaTableName("default", "example"), ImmutableList.of( - new ColumnMetadata("column1", VarcharType.VARCHAR), - new ColumnMetadata("column2", BigintType.BIGINT))); + return new ConnectorTableMetadata(new SchemaTableName("default", "example"), + ImmutableList.of(new ColumnMetadata("column1", VarcharType.VARCHAR), + new ColumnMetadata("column2", BigintType.BIGINT))); } @Override - public Map> listTableColumns(ConnectorSession session, SchemaTablePrefix prefix) + public Map> listTableColumns(ConnectorSession session, + SchemaTablePrefix prefix) { - return ImmutableMap.of(new SchemaTableName("default", "example"), ImmutableList.of( - new ColumnMetadata("column1", VarcharType.VARCHAR), - new ColumnMetadata("column2", BigintType.BIGINT))); + return ImmutableMap.of(new SchemaTableName("default", "example"), + ImmutableList.of(new ColumnMetadata("column1", VarcharType.VARCHAR), + new ColumnMetadata("column2", BigintType.BIGINT))); } @Override @@ -102,11 +103,20 @@ public Map getColumnHandles(ConnectorSession session, Conn ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle; clpClient.listColumns(clpTableHandle.getTableName()); - return ImmutableMap.of("column1", new ClpColumnHandle(ta)); + for (ClpColumnHandle columnHandle : clpClient.listColumns(clpTableHandle.getTableName())) { + System.out.println(columnHandle.getColumnName()); + } + + return ImmutableMap.of("column1", + new ClpColumnHandle("column1", VarcharType.VARCHAR, true), + "column2", + new ClpColumnHandle("column2", BigintType.BIGINT, false)); } @Override - public ColumnMetadata getColumnMetadata(ConnectorSession session, ConnectorTableHandle tableHandle, ColumnHandle columnHandle) + public ColumnMetadata getColumnMetadata(ConnectorSession session, + ConnectorTableHandle tableHandle, + ColumnHandle columnHandle) { ClpColumnHandle clpColumnHandle = (ClpColumnHandle) columnHandle; return clpColumnHandle.getColumnMetadata(); diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java b/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java index 84181ec239ef8..f0a179d9d49b0 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java @@ -17,6 +17,8 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.Objects; + public class ClpTableHandle implements ConnectorTableHandle { diff --git a/presto-clp/src/main/java/com/yscope/presto/schema/SchemaNode.java b/presto-clp/src/main/java/com/yscope/presto/schema/SchemaNode.java index 46d4efaa96a8c..18c010d784196 100644 --- a/presto-clp/src/main/java/com/yscope/presto/schema/SchemaNode.java +++ b/presto-clp/src/main/java/com/yscope/presto/schema/SchemaNode.java @@ -23,6 +23,7 @@ public class SchemaNode private final ArrayList childrenIds; private final String name; private final NodeType type; + public SchemaNode(int id, int parentId, String name, NodeType type) { this.id = id; @@ -64,7 +65,38 @@ public ArrayList getChildrenIds() public enum NodeType { - Integer, Float, ClpString, VarString, Boolean, Object, UnstructuredArray, NullValue, DateString, StructuredArray + Integer((byte) 0), + Float((byte) 1), + ClpString((byte) 2), + VarString((byte) 3), + Boolean((byte) 4), + Object((byte) 5), + UnstructuredArray((byte) 6), + NullValue((byte) 7), + DateString((byte) 8), + StructuredArray((byte) 9); + + private final byte type; + + NodeType(byte type) + { + this.type = type; + } + + public static NodeType fromType(byte type) + { + for (NodeType status : NodeType.values()) { + if (status.getType() == type) { + return status; + } + } + throw new IllegalArgumentException("Invalid type code: " + type); + } + + public byte getType() + { + return type; + } } public static class NodeTuple @@ -80,6 +112,26 @@ public NodeTuple(int parentId, String name, NodeType type) this.type = type; } + public NodeTuple(String name, NodeType type) + { + this(-1, name, type); + } + + public NodeType getType() + { + return type; + } + + public int getParentId() + { + return parentId; + } + + public String getName() + { + return name; + } + @Override public boolean equals(Object o) { @@ -90,7 +142,9 @@ public boolean equals(Object o) return false; } NodeTuple tuple = (NodeTuple) o; - return Objects.equals(type, tuple.type) && Objects.equals(parentId, tuple.parentId) && Objects.equals(name, tuple.name); + return Objects.equals(type, tuple.type) && + Objects.equals(parentId, tuple.parentId) && + Objects.equals(name, tuple.name); } @Override diff --git a/presto-clp/src/main/java/com/yscope/presto/schema/SchemaTree.java b/presto-clp/src/main/java/com/yscope/presto/schema/SchemaTree.java index ccfb46de85b35..9ca1a25c1a8d1 100644 --- a/presto-clp/src/main/java/com/yscope/presto/schema/SchemaTree.java +++ b/presto-clp/src/main/java/com/yscope/presto/schema/SchemaTree.java @@ -21,13 +21,18 @@ public class SchemaTree { private final ArrayList schemaNodes; private final Map nodeMap; - private final ArrayList primitiveTypeIds; + private final ArrayList primitiveTypeFields; public SchemaTree() { schemaNodes = new ArrayList<>(); - primitiveTypeIds = new ArrayList<>(); - nodeMap = new HashMap(); + primitiveTypeFields = new ArrayList<>(); + nodeMap = new HashMap<>(); + } + + public ArrayList getPrimitiveFields() + { + return primitiveTypeFields; } public int addNode(int parentId, String name, SchemaNode.NodeType type) @@ -44,14 +49,20 @@ public int addNode(int parentId, String name, SchemaNode.NodeType type) if (parentId >= 0) { schemaNodes.get(parentId).addChild(id); } + if (type != SchemaNode.NodeType.Object) { - primitiveTypeIds.add(id); + primitiveTypeFields.add(new SchemaNode.NodeTuple(getKeyName(id, name), type)); } return id; } - public getPrimitiveFields() + private String getKeyName(int id, String key) { - return primitiveTypeIds; + SchemaNode node = schemaNodes.get(id); + if (node.getParentId() < 0) { + return key; + } + + return getKeyName(node.getParentId(), node.getName() + "." + key); } } From 9c94bdeb41ad7c2f99d9b3f58ef6c59b4553ad3a Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 7 Jun 2024 15:06:37 -0400 Subject: [PATCH 006/126] style fix --- .../src/main/java/com/yscope/presto/ClpConnector.java | 5 ++++- .../main/java/com/yscope/presto/ClpRecordSetProvider.java | 6 +++++- .../src/main/java/com/yscope/presto/ClpSplitManager.java | 5 ++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java b/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java index 5336fec617866..7b8111c791714 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java @@ -37,7 +37,10 @@ public class ClpConnector private final ClpRecordSetProvider recordSetProvider; @Inject - public ClpConnector(LifeCycleManager lifeCycleManager, ClpMetadata metadata, ClpSplitManager splitManager, ClpRecordSetProvider recordSetProvider) + public ClpConnector(LifeCycleManager lifeCycleManager, + ClpMetadata metadata, + ClpSplitManager splitManager, + ClpRecordSetProvider recordSetProvider) { this.lifeCycleManager = requireNonNull(lifeCycleManager, "lifeCycleManager is null"); this.metadata = requireNonNull(metadata, "metadata is null"); diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java index 0f4f705c871f6..1815a1a9c92ba 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java @@ -32,7 +32,11 @@ public ClpRecordSetProvider() { } - public RecordSet getRecordSet(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorSplit split, List columns) + @Override + public RecordSet getRecordSet(ConnectorTransactionHandle transactionHandle, + ConnectorSession session, + ConnectorSplit split, + List columns) { return null; } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java b/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java index d4b5c2af30936..d4849580f2f6d 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java @@ -23,7 +23,10 @@ public class ClpSplitManager implements ConnectorSplitManager { @Override - public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableLayoutHandle layout, SplitSchedulingContext splitSchedulingContext) + public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHandle, + ConnectorSession session, + ConnectorTableLayoutHandle layout, + SplitSchedulingContext splitSchedulingContext) { return null; } From 97bc690b4b9965ba3e496307f9036bd3fddde4ae Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 7 Jun 2024 15:59:53 -0400 Subject: [PATCH 007/126] a relatively complete metadata implementation --- .../java/com/yscope/presto/ClpClient.java | 9 +++-- .../java/com/yscope/presto/ClpMetadata.java | 33 +++++++++---------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index d7eb85f634ba7..597b8a8504a58 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -43,6 +43,7 @@ public class ClpClient { private final ClpConfig config; private final Map> tableNameToColumnHandles; + private Set tableNames; @Inject public ClpClient(ClpConfig config) @@ -53,6 +54,9 @@ public ClpClient(ClpConfig config) public Set listTables() { + if (tableNames != null) { + return tableNames; + } Path archiveDir = Paths.get(config.getClpArchiveDir()); if (!Files.exists(archiveDir) || !Files.isDirectory(archiveDir)) { return ImmutableSet.of(); @@ -65,11 +69,12 @@ public Set listTables() tableNames.add(path.getFileName().toString()); } } - return tableNames.build(); + this.tableNames = tableNames.build(); } catch (Exception e) { - return ImmutableSet.of(); + this.tableNames = ImmutableSet.of(); } + return this.tableNames; } public Set listColumns(String tableName) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java b/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java index 6eb2e5b70170b..f1de4f8728aa4 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java @@ -13,8 +13,6 @@ */ package com.yscope.presto; -import com.facebook.presto.common.type.BigintType; -import com.facebook.presto.common.type.VarcharType; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.ColumnMetadata; import com.facebook.presto.spi.ConnectorSession; @@ -83,34 +81,33 @@ public ConnectorTableLayout getTableLayout(ConnectorSession session, ConnectorTa @Override public ConnectorTableMetadata getTableMetadata(ConnectorSession session, ConnectorTableHandle table) { - return new ConnectorTableMetadata(new SchemaTableName("default", "example"), - ImmutableList.of(new ColumnMetadata("column1", VarcharType.VARCHAR), - new ColumnMetadata("column2", BigintType.BIGINT))); + ClpTableHandle clpTableHandle = (ClpTableHandle) table; + String tableName = clpTableHandle.getTableName(); + List columns = clpClient.listColumns(tableName).stream() + .map(ClpColumnHandle::getColumnMetadata) + .collect(ImmutableList.toImmutableList()); + + return new ConnectorTableMetadata(new SchemaTableName("default", tableName), columns); } @Override public Map> listTableColumns(ConnectorSession session, SchemaTablePrefix prefix) { - return ImmutableMap.of(new SchemaTableName("default", "example"), - ImmutableList.of(new ColumnMetadata("column1", VarcharType.VARCHAR), - new ColumnMetadata("column2", BigintType.BIGINT))); + return clpClient.listTables().stream() + .collect(ImmutableMap.toImmutableMap( + tableName -> new SchemaTableName("default", tableName), + tableName -> getTableMetadata(session, new ClpTableHandle(tableName)).getColumns())); } @Override public Map getColumnHandles(ConnectorSession session, ConnectorTableHandle tableHandle) { ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle; - clpClient.listColumns(clpTableHandle.getTableName()); - - for (ClpColumnHandle columnHandle : clpClient.listColumns(clpTableHandle.getTableName())) { - System.out.println(columnHandle.getColumnName()); - } - - return ImmutableMap.of("column1", - new ClpColumnHandle("column1", VarcharType.VARCHAR, true), - "column2", - new ClpColumnHandle("column2", BigintType.BIGINT, false)); + return clpClient.listColumns(clpTableHandle.getTableName()).stream() + .collect(ImmutableMap.toImmutableMap( + ClpColumnHandle::getColumnName, + column -> column)); } @Override From cdb8c27fc1c7a086c8545c734a376dbe23f7d2e7 Mon Sep 17 00:00:00 2001 From: wraymo Date: Sun, 9 Jun 2024 00:45:14 -0400 Subject: [PATCH 008/126] fix bugs and add test cases --- presto-clp/pom.xml | 12 +++ .../java/com/yscope/presto/ClpClient.java | 15 +++- .../com/yscope/presto/schema/SchemaTree.java | 2 +- .../test/java/com/yscope/presto/AppTest.java | 18 ----- .../com/yscope/presto/TestClpMetadata.java | 75 ++++++++++++++++++ .../array.dict | Bin 0 -> 8 bytes .../log.dict | Bin 0 -> 53 bytes .../schema_ids | Bin 0 -> 48 bytes .../schema_tree | Bin 0 -> 65 bytes .../table_metadata | Bin 0 -> 51 bytes .../tables | Bin 0 -> 86 bytes .../timestamp.dict | Bin 0 -> 17 bytes .../var.dict | Bin 0 -> 43 bytes .../array.dict | Bin 0 -> 8 bytes .../log.dict | Bin 0 -> 39 bytes .../schema_ids | Bin 0 -> 66 bytes .../schema_tree | Bin 0 -> 72 bytes .../table_metadata | Bin 0 -> 55 bytes .../tables | Bin 0 -> 90 bytes .../timestamp.dict | Bin 0 -> 17 bytes .../var.dict | Bin 0 -> 54 bytes presto-clp/src/test/resources/logs/test_1 | 4 + presto-clp/src/test/resources/logs/test_2 | 4 + 23 files changed, 108 insertions(+), 22 deletions(-) delete mode 100644 presto-clp/src/test/java/com/yscope/presto/AppTest.java create mode 100644 presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java create mode 100644 presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/array.dict create mode 100644 presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/log.dict create mode 100644 presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/schema_ids create mode 100644 presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/schema_tree create mode 100644 presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/table_metadata create mode 100644 presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/tables create mode 100644 presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/timestamp.dict create mode 100644 presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/var.dict create mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/array.dict create mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/log.dict create mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/schema_ids create mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/schema_tree create mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/table_metadata create mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/tables create mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/timestamp.dict create mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/var.dict create mode 100644 presto-clp/src/test/resources/logs/test_1 create mode 100644 presto-clp/src/test/resources/logs/test_2 diff --git a/presto-clp/pom.xml b/presto-clp/pom.xml index 91a36885399e3..61c8d380826b3 100644 --- a/presto-clp/pom.xml +++ b/presto-clp/pom.xml @@ -89,5 +89,17 @@ slice provided + + + org.testng + testng + test + + + + com.facebook.presto + presto-main + test + diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 597b8a8504a58..1af8a7271d62a 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -27,6 +27,9 @@ import java.io.DataInputStream; import java.io.IOException; import java.io.InputStream; +import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; import java.nio.file.DirectoryStream; import java.nio.file.Files; @@ -57,6 +60,7 @@ public Set listTables() if (tableNames != null) { return tableNames; } + System.out.println("Working Directory = " + System.getProperty("user.dir")); Path archiveDir = Paths.get(config.getClpArchiveDir()); if (!Files.exists(archiveDir) || !Files.isDirectory(archiveDir)) { return ImmutableSet.of(); @@ -125,10 +129,15 @@ private Set parseSchemaTreeFile(Path schemaMapsFile) try (InputStream fileInputStream = Files.newInputStream(schemaMapsFile); ZstdInputStream zstdInputStream = new ZstdInputStream(fileInputStream); DataInputStream dataInputStream = new DataInputStream(zstdInputStream)) { - long numberOfNodes = dataInputStream.readLong(); + byte[] longBytes = new byte[8]; + byte[] intBytes = new byte[4]; + dataInputStream.readFully(longBytes); + long numberOfNodes = ByteBuffer.wrap(longBytes).order(ByteOrder.nativeOrder()).getLong(); for (int i = 0; i < numberOfNodes; i++) { - int parentId = dataInputStream.readInt(); - long stringSize = dataInputStream.readLong(); + dataInputStream.readFully(intBytes); + int parentId = ByteBuffer.wrap(intBytes).order(ByteOrder.nativeOrder()).getInt(); + dataInputStream.readFully(longBytes); + long stringSize = ByteBuffer.wrap(longBytes).order(ByteOrder.nativeOrder()).getLong(); byte[] stringBytes = new byte[(int) stringSize]; dataInputStream.readFully(stringBytes); String name = new String(stringBytes, StandardCharsets.UTF_8); diff --git a/presto-clp/src/main/java/com/yscope/presto/schema/SchemaTree.java b/presto-clp/src/main/java/com/yscope/presto/schema/SchemaTree.java index 9ca1a25c1a8d1..9791804949124 100644 --- a/presto-clp/src/main/java/com/yscope/presto/schema/SchemaTree.java +++ b/presto-clp/src/main/java/com/yscope/presto/schema/SchemaTree.java @@ -51,7 +51,7 @@ public int addNode(int parentId, String name, SchemaNode.NodeType type) } if (type != SchemaNode.NodeType.Object) { - primitiveTypeFields.add(new SchemaNode.NodeTuple(getKeyName(id, name), type)); + primitiveTypeFields.add(new SchemaNode.NodeTuple(getKeyName(parentId, name), type)); } return id; } diff --git a/presto-clp/src/test/java/com/yscope/presto/AppTest.java b/presto-clp/src/test/java/com/yscope/presto/AppTest.java deleted file mode 100644 index bd6b5092d4a8b..0000000000000 --- a/presto-clp/src/test/java/com/yscope/presto/AppTest.java +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.yscope.presto; - -public class AppTest -{ -} diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java new file mode 100644 index 0000000000000..401f766e076c6 --- /dev/null +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java @@ -0,0 +1,75 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +import com.facebook.presto.common.type.BooleanType; +import com.facebook.presto.common.type.DoubleType; +import com.facebook.presto.common.type.IntegerType; +import com.facebook.presto.common.type.VarcharType; +import com.facebook.presto.spi.ColumnMetadata; +import com.facebook.presto.spi.ConnectorTableMetadata; +import com.facebook.presto.spi.SchemaTableName; +import com.google.common.collect.ImmutableList; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.util.HashSet; +import java.util.Optional; + +import static com.facebook.presto.testing.TestingConnectorSession.SESSION; +import static org.testng.Assert.assertEquals; + +@Test(singleThreaded = true) +public class TestClpMetadata +{ + private ClpMetadata metadata; + + @BeforeMethod + public void setUp() + { + ClpConfig config = new ClpConfig().setClpArchiveDir("src/test/resources/clp_archive"); + metadata = new ClpMetadata(new ClpClient(config)); + } + + @Test + public void testListSchemaNames() + { + assertEquals(metadata.listSchemaNames(SESSION), ImmutableList.of("default")); + } + + @Test + public void testListTables() + { + assertEquals(metadata.listTables(SESSION, Optional.empty()), + ImmutableList.of(new SchemaTableName("default", "test_1_table"), + new SchemaTableName("default", "test_2_table"))); + } + + @Test + public void testGetTable1Metadata() + { + ClpTableHandle clpTableHandle = + (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName("default", "test_1_table")); + ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(SESSION, clpTableHandle); + HashSet columnMetadata = new HashSet<>(); + columnMetadata.add(ColumnMetadata.builder().setName("a").setType(IntegerType.INTEGER).setNullable(true).build()); + columnMetadata.add(ColumnMetadata.builder().setName("b").setType(DoubleType.DOUBLE).setNullable(true).build()); + columnMetadata.add(ColumnMetadata.builder().setName("c.d").setType(BooleanType.BOOLEAN).setNullable(true).build()); + columnMetadata.add(ColumnMetadata.builder().setName("c.e").setType(VarcharType.VARCHAR).setNullable(true).build()); + columnMetadata.add(ColumnMetadata.builder().setName("a").setType(VarcharType.VARCHAR).setNullable(true).build()); + columnMetadata.add(ColumnMetadata.builder().setName("b").setType(VarcharType.VARCHAR).setNullable(true).build()); + columnMetadata.add(ColumnMetadata.builder().setName("c").setType(DoubleType.DOUBLE).setNullable(true).build()); + assertEquals(columnMetadata, new HashSet<>(tableMetadata.getColumns())); + } +} diff --git a/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/array.dict b/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/array.dict new file mode 100644 index 0000000000000000000000000000000000000000..1b1cb4d44c57c2d7a5122870fa6ac3e62ff7e94e GIT binary patch literal 8 KcmZQzfB*mh2mk>9 literal 0 HcmV?d00001 diff --git a/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/log.dict b/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/log.dict new file mode 100644 index 0000000000000000000000000000000000000000..15e08683264d65e964e3d20d5f1eb7a987fd25c3 GIT binary patch literal 53 zcmZQ#fB=oH`hOWBR2dl_a5H$M=H%onl;;=ar0_Dh=jW#+Ri;9?ObiKs7!w!)P@E2r literal 0 HcmV?d00001 diff --git a/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/schema_ids b/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/schema_ids new file mode 100644 index 0000000000000000000000000000000000000000..c5ce9e254d27088e079d0e93d55e960b8568d413 GIT binary patch literal 48 zcmdPcs{fZE!j_R?1~UT-BLf2i6Du2#WM}38k{k@?oNqqaYH2GleyTmW>vRrtE&$Sx B3ef-n literal 0 HcmV?d00001 diff --git a/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/schema_tree b/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/schema_tree new file mode 100644 index 0000000000000000000000000000000000000000..d96afdbabf05e4c644b4fc8cdc2d5618a23d5cdb GIT binary patch literal 65 zcmdPcs{fZE;wU4-4^D>vK)}Gt00fK-i3~}M$*jyNEU8QkiOfmKjNA+l1R9bPcn%mE Susx8N%)}wV!@Nn3K@0$g^$}(O literal 0 HcmV?d00001 diff --git a/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/table_metadata b/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/table_metadata new file mode 100644 index 0000000000000000000000000000000000000000..13e31e9b13b64ba2221a02b87f12f53c83449e23 GIT binary patch literal 51 zcmdPcs{fZEB9xI~1v3K^gB1g#I)elQldS*)1aLAaoc1q1cpyQb<;GKPHXfBWMlk>d CN(%G< literal 0 HcmV?d00001 diff --git a/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/tables b/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/tables new file mode 100644 index 0000000000000000000000000000000000000000..cba9aaa49ba4c925555c5eaca6b03d41790583d9 GIT binary patch literal 86 zcmdPcs{fZE;wu9~10w?yg98JH10w?jurlaMGblYc^xy$w0;2|4SuO*E1_J{lg98%- Ol4v6XP$vX1Lumj-gAmvN literal 0 HcmV?d00001 diff --git a/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/timestamp.dict b/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/timestamp.dict new file mode 100644 index 0000000000000000000000000000000000000000..203cc1c9f498713682563f8263cf681a8bcb4e49 GIT binary patch literal 17 YcmdPcs{fZE!j*wRfPsOL!RP=J04P)gtN;K2 literal 0 HcmV?d00001 diff --git a/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/var.dict b/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/var.dict new file mode 100644 index 0000000000000000000000000000000000000000..2eefab01477dcdb29bc977313d7b9606c77a326a GIT binary patch literal 43 ucmZQ!fB=oH`hOWBt}-wbFf*j(=QA@TB^Ff`mR7JZG;mC}J#c~f022Vtj|;Z| literal 0 HcmV?d00001 diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/array.dict b/presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/array.dict new file mode 100644 index 0000000000000000000000000000000000000000..1b1cb4d44c57c2d7a5122870fa6ac3e62ff7e94e GIT binary patch literal 8 KcmZQzfB*mh2mk>9 literal 0 HcmV?d00001 diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/log.dict b/presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/log.dict new file mode 100644 index 0000000000000000000000000000000000000000..d9e073f455a13474d299a33f03688e771165724e GIT binary patch literal 39 kcmZQ%fB=oH`hOWBHZm~qK?QP4b4oG`a#9t_^NUi70fI;f-2eap literal 0 HcmV?d00001 diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/schema_ids b/presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/schema_ids new file mode 100644 index 0000000000000000000000000000000000000000..b9ed98ce513b4dd296972ab809bf1b69cb377588 GIT binary patch literal 66 zcmdPcs{fZE;w&Qr3lj?i6C(oy12YQ?kYr_M1Cs1O%mHF>g0Z(k!s;4@`%5?7Q`vaB MT=liyX5KA~073~3Z2$lO literal 0 HcmV?d00001 diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/schema_tree b/presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/schema_tree new file mode 100644 index 0000000000000000000000000000000000000000..14d0b05abe3a4b6a05a8840628a75c7b4ce12c89 GIT binary patch literal 72 zcmdPcs{fZE;x8kE2@@B?e;{CBWdH(3hD3&BR;E;Dh9pL&6c&a==A>knBqkn)2Im5U Z1JVY366`(9$C4+pGBzhVTw^}K1OUI-66pW{ literal 0 HcmV?d00001 diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/table_metadata b/presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/table_metadata new file mode 100644 index 0000000000000000000000000000000000000000..74749fb52d208d49cea0160dad1181277c870486 GIT binary patch literal 55 zcmdPcs{fZEqLh(g0}BHqg8&1gpd^#NAhV|g0|an0C~zCF8!<;VSWb{U@#%7mL4txI G&jtV!>!23TM%14D)+P#~uN&{vHZBL@a1hJ*)<35*cgP6i+! S0+^vRR0d=a3s4`|0VV+Gyb}`u literal 0 HcmV?d00001 diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/timestamp.dict b/presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/timestamp.dict new file mode 100644 index 0000000000000000000000000000000000000000..203cc1c9f498713682563f8263cf681a8bcb4e49 GIT binary patch literal 17 YcmdPcs{fZE!j*wRfPsOL!RP=J04P)gtN;K2 literal 0 HcmV?d00001 diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/var.dict b/presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/var.dict new file mode 100644 index 0000000000000000000000000000000000000000..e246a7ea76f8b3d84055d9ef42c41b010111c2f8 GIT binary patch literal 54 zcmZQ&fB=oH`hOWBbQu|Turhe0=H%qFGL+{R<)kn(q~+%)B^Fh&G9>Ve8yzr6n8BF9 F2mnPg4R8Pe literal 0 HcmV?d00001 diff --git a/presto-clp/src/test/resources/logs/test_1 b/presto-clp/src/test/resources/logs/test_1 new file mode 100644 index 0000000000000..885f19b5dfb59 --- /dev/null +++ b/presto-clp/src/test/resources/logs/test_1 @@ -0,0 +1,4 @@ +{"a": 1, "b": 2.0, "c": {"d": true, "e": "Hello world"}} +{"a": 2, "b": 3.0, "c": {"d": false, "e": "Goodbye world"}} +{"a": "foo", "b": "bar", "c": 2.0} +{"a": "baz", "b": "qux"} \ No newline at end of file diff --git a/presto-clp/src/test/resources/logs/test_2 b/presto-clp/src/test/resources/logs/test_2 new file mode 100644 index 0000000000000..ded0ccac0146a --- /dev/null +++ b/presto-clp/src/test/resources/logs/test_2 @@ -0,0 +1,4 @@ +{"a": 36, "c": {"e": "Hello"}} +{"a": 25, "b": 18.36, "c": {"d": false, "e": "world"}} +{"a": "foo", "b": "bar", "c": true} +{"a": "baz", "b": "multiple words"} \ No newline at end of file From 253cf7a80381a96cca702c70cfbd557aa769bbb4 Mon Sep 17 00:00:00 2001 From: wraymo Date: Sun, 9 Jun 2024 18:18:45 -0400 Subject: [PATCH 009/126] modify logs and add a test case --- .../com/yscope/presto/TestClpMetadata.java | 89 ++++++++++++++++-- .../array.dict | Bin .../log.dict | Bin .../schema_ids | Bin 0 -> 67 bytes .../schema_tree | Bin 0 -> 78 bytes .../table_metadata | Bin 0 -> 55 bytes .../tables | Bin 0 -> 102 bytes .../timestamp.dict | Bin .../var.dict | Bin .../schema_ids | Bin 66 -> 0 bytes .../schema_tree | Bin 72 -> 0 bytes .../table_metadata | Bin 55 -> 0 bytes .../tables | Bin 90 -> 0 bytes presto-clp/src/test/resources/logs/test_2 | 4 +- 14 files changed, 84 insertions(+), 9 deletions(-) rename presto-clp/src/test/resources/clp_archive/test_2_table/{e58fb658-db99-4424-a121-5ffce10f19fa => 74d74cae-adc3-4ab8-aab0-3aeb5104740b}/array.dict (100%) rename presto-clp/src/test/resources/clp_archive/test_2_table/{e58fb658-db99-4424-a121-5ffce10f19fa => 74d74cae-adc3-4ab8-aab0-3aeb5104740b}/log.dict (100%) create mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/schema_ids create mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/schema_tree create mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/table_metadata create mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/tables rename presto-clp/src/test/resources/clp_archive/test_2_table/{e58fb658-db99-4424-a121-5ffce10f19fa => 74d74cae-adc3-4ab8-aab0-3aeb5104740b}/timestamp.dict (100%) rename presto-clp/src/test/resources/clp_archive/test_2_table/{e58fb658-db99-4424-a121-5ffce10f19fa => 74d74cae-adc3-4ab8-aab0-3aeb5104740b}/var.dict (100%) delete mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/schema_ids delete mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/schema_tree delete mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/table_metadata delete mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/tables diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java index 401f766e076c6..38953444fba8b 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java @@ -38,7 +38,8 @@ public class TestClpMetadata @BeforeMethod public void setUp() { - ClpConfig config = new ClpConfig().setClpArchiveDir("src/test/resources/clp_archive"); + ClpConfig config = + new ClpConfig().setClpArchiveDir("src/test/resources/clp_archive").setPolymorphicTypeEnabled(true); metadata = new ClpMetadata(new ClpClient(config)); } @@ -63,13 +64,87 @@ public void testGetTable1Metadata() (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName("default", "test_1_table")); ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(SESSION, clpTableHandle); HashSet columnMetadata = new HashSet<>(); - columnMetadata.add(ColumnMetadata.builder().setName("a").setType(IntegerType.INTEGER).setNullable(true).build()); - columnMetadata.add(ColumnMetadata.builder().setName("b").setType(DoubleType.DOUBLE).setNullable(true).build()); - columnMetadata.add(ColumnMetadata.builder().setName("c.d").setType(BooleanType.BOOLEAN).setNullable(true).build()); - columnMetadata.add(ColumnMetadata.builder().setName("c.e").setType(VarcharType.VARCHAR).setNullable(true).build()); - columnMetadata.add(ColumnMetadata.builder().setName("a").setType(VarcharType.VARCHAR).setNullable(true).build()); - columnMetadata.add(ColumnMetadata.builder().setName("b").setType(VarcharType.VARCHAR).setNullable(true).build()); + columnMetadata.add(ColumnMetadata.builder() + .setName("a_integer") + .setType(IntegerType.INTEGER) + .setNullable(true) + .build()); + columnMetadata.add(ColumnMetadata.builder() + .setName("b_double") + .setType(DoubleType.DOUBLE) + .setNullable(true) + .build()); + columnMetadata.add(ColumnMetadata.builder() + .setName("c.d") + .setType(BooleanType.BOOLEAN) + .setNullable(true) + .build()); + columnMetadata.add(ColumnMetadata.builder() + .setName("c.e") + .setType(VarcharType.VARCHAR) + .setNullable(true) + .build()); + columnMetadata.add(ColumnMetadata.builder() + .setName("a_varchar") + .setType(VarcharType.VARCHAR) + .setNullable(true) + .build()); + columnMetadata.add(ColumnMetadata.builder() + .setName("b_varchar") + .setType(VarcharType.VARCHAR) + .setNullable(true) + .build()); columnMetadata.add(ColumnMetadata.builder().setName("c").setType(DoubleType.DOUBLE).setNullable(true).build()); assertEquals(columnMetadata, new HashSet<>(tableMetadata.getColumns())); } + + @Test + public void testGetTable2Metadata() + { + ClpTableHandle clpTableHandle = + (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName("default", "test_2_table")); + ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(SESSION, clpTableHandle); + HashSet columnMetadata = new HashSet<>(); + columnMetadata.add(ColumnMetadata.builder() + .setName("a_double") + .setType(DoubleType.DOUBLE) + .setNullable(true) + .build()); + columnMetadata.add(ColumnMetadata.builder() + .setName("b.c") + .setType(VarcharType.VARCHAR) + .setNullable(true) + .build()); + columnMetadata.add(ColumnMetadata.builder() + .setName("b_integer") + .setType(IntegerType.INTEGER) + .setNullable(true) + .build()); + columnMetadata.add(ColumnMetadata.builder() + .setName("c.d") + .setType(VarcharType.VARCHAR) + .setNullable(true) + .build()); + columnMetadata.add(ColumnMetadata.builder() + .setName("c.e") + .setType(BooleanType.BOOLEAN) + .setNullable(true) + .build()); + columnMetadata.add(ColumnMetadata.builder() + .setName("a_varchar") + .setType(VarcharType.VARCHAR) + .setNullable(true) + .build()); + columnMetadata.add(ColumnMetadata.builder() + .setName("b_varchar") + .setType(VarcharType.VARCHAR) + .setNullable(true) + .build()); + columnMetadata.add(ColumnMetadata.builder() + .setName("c") + .setType(BooleanType.BOOLEAN) + .setNullable(true) + .build()); + assertEquals(columnMetadata, new HashSet<>(tableMetadata.getColumns())); + } } diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/array.dict b/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/array.dict similarity index 100% rename from presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/array.dict rename to presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/array.dict diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/log.dict b/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/log.dict similarity index 100% rename from presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/log.dict rename to presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/log.dict diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/schema_ids b/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/schema_ids new file mode 100644 index 0000000000000000000000000000000000000000..52f098ca733b93ba4d0d32b5ee9722ff2e884618 GIT binary patch literal 67 zcmdPcs{fZE;wmG92onnf6C(oy12YRF8<1jW<^YnMK+FYVaDzGC3JI%g6z(tGv@T`a O=_J+HdYgH-FaiKg3JsJ1 literal 0 HcmV?d00001 diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/schema_tree b/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/schema_tree new file mode 100644 index 0000000000000000000000000000000000000000..1b79f476c5adee39347dc04e806d3e02d07551c8 GIT binary patch literal 78 zcmdPcs{fZELYIl5go%gYKM*jmG5`T1Ln31mD^oHvLlQ$WD{BgKDhop*b5b%(5)&^& fgL8qw0ciu72fPw|AJ}@B+qzFmOGu~KGUxyR4JH$s literal 0 HcmV?d00001 diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/table_metadata b/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/table_metadata new file mode 100644 index 0000000000000000000000000000000000000000..2788e97d9953cfd1ed5b998649b89c188b7414e9 GIT binary patch literal 55 zcmdPcs{fZEqLh(g0}BHqg8&1gq$HEIAake$0|an0C~zCF8!<;VSWb{U@#%7mL4txI G&jtV(KnrmI literal 0 HcmV?d00001 diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/tables b/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/tables new file mode 100644 index 0000000000000000000000000000000000000000..5b0c0a51c215ffa5415fce40eba0d4cdf0c6f3e9 GIT binary patch literal 102 zcmdPcs{fZEqLG1No%_QokvJy@1}M+~OPpk2(7&s~)_B#@K?o|rh^nHKfq@As!VFUZ R(d){_A;65s#; literal 0 HcmV?d00001 diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/timestamp.dict b/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/timestamp.dict similarity index 100% rename from presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/timestamp.dict rename to presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/timestamp.dict diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/var.dict b/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/var.dict similarity index 100% rename from presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/var.dict rename to presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/var.dict diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/schema_ids b/presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/schema_ids deleted file mode 100644 index b9ed98ce513b4dd296972ab809bf1b69cb377588..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 66 zcmdPcs{fZE;w&Qr3lj?i6C(oy12YQ?kYr_M1Cs1O%mHF>g0Z(k!s;4@`%5?7Q`vaB MT=liyX5KA~073~3Z2$lO diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/schema_tree b/presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/schema_tree deleted file mode 100644 index 14d0b05abe3a4b6a05a8840628a75c7b4ce12c89..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 72 zcmdPcs{fZE;x8kE2@@B?e;{CBWdH(3hD3&BR;E;Dh9pL&6c&a==A>knBqkn)2Im5U Z1JVY366`(9$C4+pGBzhVTw^}K1OUI-66pW{ diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/table_metadata b/presto-clp/src/test/resources/clp_archive/test_2_table/e58fb658-db99-4424-a121-5ffce10f19fa/table_metadata deleted file mode 100644 index 74749fb52d208d49cea0160dad1181277c870486..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 55 zcmdPcs{fZEqLh(g0}BHqg8&1gpd^#NAhV|g0|an0C~zCF8!<;VSWb{U@#%7mL4txI G&jtV!>!23TM%14D)+P#~uN&{vHZBL@a1hJ*)<35*cgP6i+! S0+^vRR0d=a3s4`|0VV+Gyb}`u diff --git a/presto-clp/src/test/resources/logs/test_2 b/presto-clp/src/test/resources/logs/test_2 index ded0ccac0146a..a011c4b41301f 100644 --- a/presto-clp/src/test/resources/logs/test_2 +++ b/presto-clp/src/test/resources/logs/test_2 @@ -1,4 +1,4 @@ -{"a": 36, "c": {"e": "Hello"}} -{"a": 25, "b": 18.36, "c": {"d": false, "e": "world"}} +{"a": 36.735, "b": {"c": "Hello"}} +{"a": 25.834, "b": 18, "c": {"d": "world", "e": false}} {"a": "foo", "b": "bar", "c": true} {"a": "baz", "b": "multiple words"} \ No newline at end of file From 6f5ddba06a678ee0a32d50879fd0602a8df0664c Mon Sep 17 00:00:00 2001 From: wraymo Date: Mon, 10 Jun 2024 11:48:25 -0400 Subject: [PATCH 010/126] add decompression --- .../java/com/yscope/presto/ClpClient.java | 93 ++++++++++++++++++- .../java/com/yscope/presto/ClpConfig.java | 26 ++++++ .../com/yscope/presto/ClpRecordCursor.java | 20 ++++ .../java/com/yscope/presto/ClpRecordSet.java | 13 ++- .../yscope/presto/ClpRecordSetProvider.java | 13 ++- .../main/java/com/yscope/presto/ClpSplit.java | 28 ++++++ 6 files changed, 185 insertions(+), 8 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 1af8a7271d62a..64715bd7d16f1 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -13,6 +13,7 @@ */ package com.yscope.presto; +import com.facebook.airlift.log.Logger; import com.facebook.presto.common.type.BooleanType; import com.facebook.presto.common.type.DoubleType; import com.facebook.presto.common.type.IntegerType; @@ -24,10 +25,10 @@ import com.yscope.presto.schema.SchemaNode; import com.yscope.presto.schema.SchemaTree; +import java.io.BufferedReader; import java.io.DataInputStream; import java.io.IOException; import java.io.InputStream; -import java.math.BigInteger; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; @@ -42,17 +43,56 @@ import java.util.Map; import java.util.Set; +import static java.util.Objects.requireNonNull; + public class ClpClient { + private static final Logger log = Logger.get(ClpClient.class); private final ClpConfig config; + private final Path executablePath; private final Map> tableNameToColumnHandles; private Set tableNames; @Inject public ClpClient(ClpConfig config) { - this.config = config; + this.config = requireNonNull(config, "config is null"); this.tableNameToColumnHandles = new HashMap<>(); + this.executablePath = getExecutablePath(); + } + + private Path getExecutablePath() + { + String executablePathString = config.getClpExecutablePath(); + if (executablePathString == null || executablePathString.isEmpty()) { + Path executablePath = getExecutablePathFromEnvironment(); + if (executablePath == null) { + throw new RuntimeException("CLP executable path is not set"); + } + return executablePath; + } + Path executablePath = Paths.get(executablePathString); + if (!Files.exists(executablePath) || !Files.isRegularFile(executablePath)) { + executablePath = getExecutablePathFromEnvironment(); + if (executablePath == null) { + throw new RuntimeException("CLP executable path is not set"); + } + } + return executablePath; + } + + private Path getExecutablePathFromEnvironment() + { + String executablePathString = System.getenv("CLP_EXECUTABLE_PATH"); + if (executablePathString == null || executablePathString.isEmpty()) { + return null; + } + + Path executablePath = Paths.get(executablePathString); + if (!Files.exists(executablePath) || !Files.isRegularFile(executablePath)) { + return null; + } + return executablePath; } public Set listTables() @@ -60,10 +100,14 @@ public Set listTables() if (tableNames != null) { return tableNames; } - System.out.println("Working Directory = " + System.getProperty("user.dir")); + if (config.getClpArchiveDir() == null || config.getClpArchiveDir().isEmpty()) { + tableNames = ImmutableSet.of(); + return tableNames; + } Path archiveDir = Paths.get(config.getClpArchiveDir()); if (!Files.exists(archiveDir) || !Files.isDirectory(archiveDir)) { - return ImmutableSet.of(); + tableNames = ImmutableSet.of(); + return tableNames; } try (DirectoryStream stream = Files.newDirectoryStream(archiveDir)) { @@ -94,7 +138,6 @@ public Set listColumns(String tableName) } try (DirectoryStream stream = Files.newDirectoryStream(tableDir)) { - ImmutableSet.Builder columnNames = ImmutableSet.builder(); for (Path path : stream) { if (Files.isRegularFile(path)) { continue; @@ -123,6 +166,46 @@ public Set listColumns(String tableName) return polymorphicColumnHandles; } + public BufferedReader getRecords(String tableName) + { + if (!listTables().contains(tableName)) { + return null; + } + + Path decompressFile = Paths.get(config.getClpDecompressDir(), tableName, "original"); + if (!Files.exists(decompressFile) || !Files.isRegularFile(decompressFile)) { + if (!DecompressRecords(tableName)) { + return null; + } + } + + try { + return Files.newBufferedReader(decompressFile); + } + catch (IOException e) { + log.error(e, "Failed to get records for table %s", tableName); + return null; + } + } + + private boolean DecompressRecords(String tableName) + { + Path decompressDir = Paths.get(config.getClpDecompressDir(), tableName); + Path tableDir = Paths.get(config.getClpArchiveDir(), tableName); + + try { + ProcessBuilder processBuilder = + new ProcessBuilder(executablePath.toString(), "x", tableDir.toString(), decompressDir.toString()); + Process process = processBuilder.start(); + process.waitFor(); + return process.exitValue() == 0; + } + catch (IOException | InterruptedException e) { + log.error(e, "Failed to decompress records for table %s", tableName); + return false; + } + } + private Set parseSchemaTreeFile(Path schemaMapsFile) { SchemaTree schemaTree = new SchemaTree(); diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java index 1745a7231f95a..f3bd011a696a7 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java @@ -17,9 +17,23 @@ public class ClpConfig { + private String clpExecutablePath; private String clpArchiveDir; + private String clpDecompressDir; private boolean polymorphicTypeEnabled; + public String getClpExecutablePath() + { + return clpExecutablePath; + } + + @Config("executable-path") + public ClpConfig setClpExecutablePath(String clpExecutablePath) + { + this.clpExecutablePath = clpExecutablePath; + return this; + } + public String getClpArchiveDir() { return clpArchiveDir; @@ -32,6 +46,18 @@ public ClpConfig setClpArchiveDir(String clpArchiveDir) return this; } + public String getClpDecompressDir() + { + return clpDecompressDir; + } + + @Config("decompress-dir") + public ClpConfig setClpDecompressDir(String clpDecompressDir) + { + this.clpDecompressDir = clpDecompressDir; + return this; + } + public boolean isPolymorphicTypeEnabled() { return polymorphicTypeEnabled; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java index 4c0715286db47..f7af0661cd7ef 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java @@ -17,9 +17,20 @@ import com.facebook.presto.spi.RecordCursor; import io.airlift.slice.Slice; +import java.io.BufferedReader; +import java.util.List; + public class ClpRecordCursor implements RecordCursor { + private final BufferedReader reader; + private final List columnHandles; + + public ClpRecordCursor(BufferedReader reader, List columnHandles) + { + this.reader = reader; + this.columnHandles = columnHandles; + } @Override public long getCompletedBytes() { @@ -41,6 +52,15 @@ public Type getType(int field) @Override public boolean advanceNextPosition() { + try { + if (reader.readLine() == null) { + return false; + } + } + catch (Exception e) { + return false; + } + return false; } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java index 03c2d6c51a415..e6bbf75972a92 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java @@ -17,11 +17,22 @@ import com.facebook.presto.spi.RecordCursor; import com.facebook.presto.spi.RecordSet; +import java.io.BufferedReader; import java.util.List; +import static java.util.Objects.requireNonNull; + public class ClpRecordSet implements RecordSet { + private final BufferedReader reader; + private final List columnHandles; + public ClpRecordSet(BufferedReader reader, List columnHandles) + { + this.reader = requireNonNull(reader, "reader is null"); + this.columnHandles = requireNonNull(columnHandles, "column handles is null"); + } + @Override public List getColumnTypes() { @@ -31,6 +42,6 @@ public List getColumnTypes() @Override public RecordCursor cursor() { - return null; + return new ClpRecordCursor(reader, columnHandles); } } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java index 1815a1a9c92ba..caba8f9ee5030 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java @@ -19,6 +19,7 @@ import com.facebook.presto.spi.RecordSet; import com.facebook.presto.spi.connector.ConnectorRecordSetProvider; import com.facebook.presto.spi.connector.ConnectorTransactionHandle; +import com.google.common.collect.ImmutableList; import javax.inject.Inject; @@ -27,9 +28,12 @@ public class ClpRecordSetProvider implements ConnectorRecordSetProvider { + ClpClient clpClient; + @Inject - public ClpRecordSetProvider() + public ClpRecordSetProvider(ClpClient clpClient) { + this.clpClient = clpClient; } @Override @@ -38,6 +42,11 @@ public RecordSet getRecordSet(ConnectorTransactionHandle transactionHandle, ConnectorSplit split, List columns) { - return null; + ClpSplit clpSplit = (ClpSplit) split; + ImmutableList.Builder handles = ImmutableList.builder(); + for (ColumnHandle handle : columns) { + handles.add((ClpColumnHandle) handle); + } + return new ClpRecordSet(clpClient.getRecords(clpSplit.getTableName()), handles.build()); } } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java b/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java index 23adf54e9f19c..c04a1401efb04 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java @@ -17,8 +17,12 @@ import com.facebook.presto.spi.HostAddress; import com.facebook.presto.spi.NodeProvider; import com.facebook.presto.spi.schedule.NodeSelectionStrategy; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.collect.ImmutableList; +import javax.annotation.Nullable; + import java.util.List; import static com.facebook.presto.spi.schedule.NodeSelectionStrategy.NO_PREFERENCE; @@ -26,6 +30,30 @@ public class ClpSplit implements ConnectorSplit { + private final String schemaName; + private final String tableName; + + @JsonCreator + public ClpSplit(@JsonProperty("schemaName") @Nullable String schemaName, + @JsonProperty("tableName") @Nullable String tableName) + { + this.schemaName = schemaName; + this.tableName = tableName; + } + + @JsonProperty + @Nullable + public String getSchemaName() + { + return schemaName; + } + + @JsonProperty + public String getTableName() + { + return tableName; + } + @Override public NodeSelectionStrategy getNodeSelectionStrategy() { From 4866a10884ff1735415a816cc33fc74f7776b3b9 Mon Sep 17 00:00:00 2001 From: wraymo Date: Mon, 10 Jun 2024 14:09:51 -0400 Subject: [PATCH 011/126] add record cursor impl --- .../java/com/yscope/presto/ClpClient.java | 5 + .../com/yscope/presto/ClpRecordCursor.java | 104 ++++++++++++++++-- .../java/com/yscope/presto/ClpRecordSet.java | 7 +- .../yscope/presto/ClpRecordSetProvider.java | 4 +- .../com/yscope/presto/TestClpMetadata.java | 16 ++- .../com/yscope/presto/TestRecordCursor.java | 18 +++ 6 files changed, 141 insertions(+), 13 deletions(-) create mode 100644 presto-clp/src/test/java/com/yscope/presto/TestRecordCursor.java diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 64715bd7d16f1..5c3c94d8016a9 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -61,6 +61,11 @@ public ClpClient(ClpConfig config) this.executablePath = getExecutablePath(); } + public ClpConfig getConfig() + { + return config; + } + private Path getExecutablePath() { String executablePathString = config.getClpExecutablePath(); diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java index f7af0661cd7ef..d2c94a46aa53e 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java @@ -15,22 +15,44 @@ import com.facebook.presto.common.type.Type; import com.facebook.presto.spi.RecordCursor; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; import io.airlift.slice.Slice; +import io.airlift.slice.Slices; import java.io.BufferedReader; +import java.util.ArrayList; +import java.util.Iterator; import java.util.List; +import java.util.Map; + +import static com.facebook.presto.common.type.BigintType.BIGINT; +import static com.facebook.presto.common.type.BooleanType.BOOLEAN; +import static com.facebook.presto.common.type.DoubleType.DOUBLE; +import static com.facebook.presto.common.type.IntegerType.INTEGER; +import static com.facebook.presto.common.type.VarcharType.VARCHAR; +import static com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType; +import static com.google.common.base.Preconditions.checkArgument; public class ClpRecordCursor implements RecordCursor { private final BufferedReader reader; + private final boolean isPolymorphicTypeEnabled; private final List columnHandles; + private final List fields; - public ClpRecordCursor(BufferedReader reader, List columnHandles) + public ClpRecordCursor(BufferedReader reader, boolean isPolymorphicTypeEnabled, List columnHandles) { this.reader = reader; + this.isPolymorphicTypeEnabled = isPolymorphicTypeEnabled; this.columnHandles = columnHandles; + this.fields = new ArrayList<>(columnHandles.size()); + for (int i = 0; i < columnHandles.size(); i++) { + fields.add(null); + } } + @Override public long getCompletedBytes() { @@ -46,16 +68,20 @@ public long getReadTimeNanos() @Override public Type getType(int field) { - return null; + return columnHandles.get(field).getColumnType(); } @Override public boolean advanceNextPosition() { try { - if (reader.readLine() == null) { + String line = reader.readLine(); + if (line == null) { return false; } + fields.replaceAll(ignored -> null); + JsonNode node = new ObjectMapper().readTree(line); + parseLine(node, ""); } catch (Exception e) { return false; @@ -64,28 +90,38 @@ public boolean advanceNextPosition() return false; } + private void checkFieldType(int field, Type expected) + { + Type actual = getType(field); + checkArgument(actual.equals(expected), "Expected field %s to be type %s but is %s", field, expected, actual); + } + @Override public boolean getBoolean(int field) { - return false; + checkFieldType(field, BOOLEAN); + return Boolean.parseBoolean(fields.get(field)); } @Override public long getLong(int field) { - return 0; + checkFieldType(field, BIGINT); + return Long.parseLong(fields.get(field)); } @Override public double getDouble(int field) { - return 0; + checkFieldType(field, DOUBLE); + return Double.parseDouble(fields.get(field)); } @Override public Slice getSlice(int field) { - return null; + checkFieldType(field, createUnboundedVarcharType()); + return Slices.utf8Slice(fields.get(field)); } @Override @@ -97,11 +133,63 @@ public Object getObject(int field) @Override public boolean isNull(int field) { - return false; + return fields.get(field) == null; } @Override public void close() { } + + private void parseLine(JsonNode node, String prefix) + { + if (node.isObject()) { + Iterator> fields = node.fields(); + while (fields.hasNext()) { + Map.Entry field = fields.next(); + String key = field.getKey(); + JsonNode value = field.getValue(); + parseLine(value, prefix.isEmpty() ? key : prefix + "." + key); + } + } + else { + int index = getFieldIndex(prefix, node); + if (index == -1) { + return; + } + fields.set(index, node.toString()); + } + } + + private String jsonNodeToTypeString(JsonNode node) + { + if (node.isIntegralNumber()) { + return INTEGER.getDisplayName(); + } + if (node.isFloatingPointNumber()) { + return DOUBLE.getDisplayName(); + } + if (node.isBoolean()) { + return BOOLEAN.getDisplayName(); + } + if (node.isTextual()) { + return VARCHAR.getDisplayName(); + } + return "unknown"; + } + + private int getFieldIndex(String fieldName, JsonNode node) + { + for (int i = 0; i < columnHandles.size(); i++) { + if (columnHandles.get(i).getColumnName().equals(fieldName)) { + return i; + } + + if (isPolymorphicTypeEnabled && (fieldName + "_" + jsonNodeToTypeString(node)).equals(columnHandles.get(i) + .getColumnName())) { + return i; + } + } + return -1; + } } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java index e6bbf75972a92..245ee4eba3771 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java @@ -27,9 +27,12 @@ public class ClpRecordSet { private final BufferedReader reader; private final List columnHandles; - public ClpRecordSet(BufferedReader reader, List columnHandles) + private final boolean isPolymorphicTypeEnabled; + + public ClpRecordSet(BufferedReader reader, boolean isPolymorphicTypeEnabled, List columnHandles) { this.reader = requireNonNull(reader, "reader is null"); + this.isPolymorphicTypeEnabled = isPolymorphicTypeEnabled; this.columnHandles = requireNonNull(columnHandles, "column handles is null"); } @@ -42,6 +45,6 @@ public List getColumnTypes() @Override public RecordCursor cursor() { - return new ClpRecordCursor(reader, columnHandles); + return new ClpRecordCursor(reader, isPolymorphicTypeEnabled, columnHandles); } } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java index caba8f9ee5030..68f928a739fed 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java @@ -47,6 +47,8 @@ public RecordSet getRecordSet(ConnectorTransactionHandle transactionHandle, for (ColumnHandle handle : columns) { handles.add((ClpColumnHandle) handle); } - return new ClpRecordSet(clpClient.getRecords(clpSplit.getTableName()), handles.build()); + return new ClpRecordSet(clpClient.getRecords(clpSplit.getTableName()), + clpClient.getConfig().isPolymorphicTypeEnabled(), + handles.build()); } } diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java index 38953444fba8b..8de51913827ef 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java @@ -24,6 +24,7 @@ import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; +import java.util.ArrayList; import java.util.HashSet; import java.util.Optional; @@ -38,8 +39,9 @@ public class TestClpMetadata @BeforeMethod public void setUp() { - ClpConfig config = - new ClpConfig().setClpArchiveDir("src/test/resources/clp_archive").setPolymorphicTypeEnabled(true); + ClpConfig config = new ClpConfig().setClpArchiveDir("src/test/resources/clp_archive") + .setPolymorphicTypeEnabled(true) + .setClpExecutablePath("/usr/local/bin/clp-s"); metadata = new ClpMetadata(new ClpClient(config)); } @@ -147,4 +149,14 @@ public void testGetTable2Metadata() .build()); assertEquals(columnMetadata, new HashSet<>(tableMetadata.getColumns())); } + + @Test + public void testGetTableHandle() + { + ArrayList t = new ArrayList(2); + if (t.get(0) == null) { + System.out.println("null"); + } + t.set(0, "default"); + } } diff --git a/presto-clp/src/test/java/com/yscope/presto/TestRecordCursor.java b/presto-clp/src/test/java/com/yscope/presto/TestRecordCursor.java new file mode 100644 index 0000000000000..6de25c49ac626 --- /dev/null +++ b/presto-clp/src/test/java/com/yscope/presto/TestRecordCursor.java @@ -0,0 +1,18 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +public class TestRecordCursor +{ +} From 409c3e837ebedfbad840bbe4e3c10e41e1c4451c Mon Sep 17 00:00:00 2001 From: wraymo Date: Mon, 10 Jun 2024 15:10:31 -0400 Subject: [PATCH 012/126] store decompressed logs to tmpdir --- presto-clp/pom.xml | 16 ++++++++ .../java/com/yscope/presto/ClpClient.java | 38 +++++++++++++++++-- .../java/com/yscope/presto/ClpConfig.java | 13 ------- .../java/com/yscope/presto/ClpModule.java | 2 +- .../com/yscope/presto/TestRecordCursor.java | 13 +++++++ 5 files changed, 64 insertions(+), 18 deletions(-) diff --git a/presto-clp/pom.xml b/presto-clp/pom.xml index 61c8d380826b3..749ef6ad3d527 100644 --- a/presto-clp/pom.xml +++ b/presto-clp/pom.xml @@ -45,6 +45,17 @@ guice + + javax.annotation + javax.annotation-api + + + + com.google.code.findbugs + jsr305 + true + + com.google.guava guava @@ -61,6 +72,11 @@ provided + + com.fasterxml.jackson.core + jackson-databind + + com.github.luben zstd-jni diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 5c3c94d8016a9..6330c7ae57544 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -25,6 +25,9 @@ import com.yscope.presto.schema.SchemaNode; import com.yscope.presto.schema.SchemaTree; +import javax.annotation.PostConstruct; +import javax.annotation.PreDestroy; + import java.io.BufferedReader; import java.io.DataInputStream; import java.io.IOException; @@ -51,6 +54,7 @@ public class ClpClient private final ClpConfig config; private final Path executablePath; private final Map> tableNameToColumnHandles; + private final Path decompressDir; private Set tableNames; @Inject @@ -59,6 +63,29 @@ public ClpClient(ClpConfig config) this.config = requireNonNull(config, "config is null"); this.tableNameToColumnHandles = new HashMap<>(); this.executablePath = getExecutablePath(); + this.decompressDir = Paths.get(System.getProperty("java.io.tmpdir"), "clp_decompress"); + } + + @PostConstruct + public void start() + { + try { + Files.createDirectories(decompressDir); + } + catch (IOException e) { + log.error(e, "Failed to create decompression directory"); + } + } + + @PreDestroy + public void close() + { + try { + Files.deleteIfExists(decompressDir); + } + catch (IOException e) { + log.error(e, "Failed to delete decompression directory"); + } } public ClpConfig getConfig() @@ -177,7 +204,7 @@ public BufferedReader getRecords(String tableName) return null; } - Path decompressFile = Paths.get(config.getClpDecompressDir(), tableName, "original"); + Path decompressFile = decompressDir.resolve(tableName).resolve("original"); if (!Files.exists(decompressFile) || !Files.isRegularFile(decompressFile)) { if (!DecompressRecords(tableName)) { return null; @@ -195,12 +222,15 @@ public BufferedReader getRecords(String tableName) private boolean DecompressRecords(String tableName) { - Path decompressDir = Paths.get(config.getClpDecompressDir(), tableName); - Path tableDir = Paths.get(config.getClpArchiveDir(), tableName); + Path tableDecompressDir = decompressDir.resolve(tableName); + Path tableArchiveDir = Paths.get(config.getClpArchiveDir(), tableName); try { ProcessBuilder processBuilder = - new ProcessBuilder(executablePath.toString(), "x", tableDir.toString(), decompressDir.toString()); + new ProcessBuilder(executablePath.toString(), + "x", + tableArchiveDir.toString(), + tableDecompressDir.toString()); Process process = processBuilder.start(); process.waitFor(); return process.exitValue() == 0; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java index f3bd011a696a7..dfd39128d50bf 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java @@ -19,7 +19,6 @@ public class ClpConfig { private String clpExecutablePath; private String clpArchiveDir; - private String clpDecompressDir; private boolean polymorphicTypeEnabled; public String getClpExecutablePath() @@ -46,18 +45,6 @@ public ClpConfig setClpArchiveDir(String clpArchiveDir) return this; } - public String getClpDecompressDir() - { - return clpDecompressDir; - } - - @Config("decompress-dir") - public ClpConfig setClpDecompressDir(String clpDecompressDir) - { - this.clpDecompressDir = clpDecompressDir; - return this; - } - public boolean isPolymorphicTypeEnabled() { return polymorphicTypeEnabled; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpModule.java b/presto-clp/src/main/java/com/yscope/presto/ClpModule.java index 59ccd5925f7b9..643009b9dbefb 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpModule.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpModule.java @@ -29,7 +29,7 @@ public void configure(Binder binder) binder.bind(ClpMetadata.class).in(Scopes.SINGLETON); binder.bind(ClpSplitManager.class).in(Scopes.SINGLETON); binder.bind(ClpRecordSetProvider.class).in(Scopes.SINGLETON); - // TODO: bind ClpClient + binder.bind(ClpClient.class).in(Scopes.SINGLETON); configBinder(binder).bindConfig(ClpConfig.class); } diff --git a/presto-clp/src/test/java/com/yscope/presto/TestRecordCursor.java b/presto-clp/src/test/java/com/yscope/presto/TestRecordCursor.java index 6de25c49ac626..c558b4022475e 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestRecordCursor.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestRecordCursor.java @@ -13,6 +13,19 @@ */ package com.yscope.presto; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +@Test(singleThreaded = true) public class TestRecordCursor { + private ClpClient clpClient; + @BeforeMethod + public void setUp() + { + ClpConfig config = new ClpConfig().setClpArchiveDir("src/test/resources/clp_archive") + .setPolymorphicTypeEnabled(true) + .setClpExecutablePath("/usr/local/bin/clp-s"); + clpClient = new ClpClient(config); + } } From 6450e87313aea0ff185aa0e3b0f64ce3662460b3 Mon Sep 17 00:00:00 2001 From: wraymo Date: Mon, 10 Jun 2024 16:04:17 -0400 Subject: [PATCH 013/126] and tablelayouthandle and add a basic record cursor test --- .../java/com/yscope/presto/ClpClient.java | 6 +-- .../java/com/yscope/presto/ClpMetadata.java | 17 +++++++- .../java/com/yscope/presto/ClpRecordSet.java | 3 +- .../com/yscope/presto/ClpSplitManager.java | 21 +++++++++- .../yscope/presto/ClpTableLayoutHandle.java | 42 +++++++++++++++++++ .../com/yscope/presto/TestRecordCursor.java | 30 +++++++++++++ 6 files changed, 113 insertions(+), 6 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 6330c7ae57544..9b29dd601abea 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -21,12 +21,12 @@ import com.facebook.presto.common.type.VarcharType; import com.github.luben.zstd.ZstdInputStream; import com.google.common.collect.ImmutableSet; -import com.google.inject.Inject; import com.yscope.presto.schema.SchemaNode; import com.yscope.presto.schema.SchemaTree; import javax.annotation.PostConstruct; import javax.annotation.PreDestroy; +import javax.inject.Inject; import java.io.BufferedReader; import java.io.DataInputStream; @@ -245,8 +245,8 @@ private Set parseSchemaTreeFile(Path schemaMapsFile) { SchemaTree schemaTree = new SchemaTree(); try (InputStream fileInputStream = Files.newInputStream(schemaMapsFile); - ZstdInputStream zstdInputStream = new ZstdInputStream(fileInputStream); - DataInputStream dataInputStream = new DataInputStream(zstdInputStream)) { + ZstdInputStream zstdInputStream = new ZstdInputStream(fileInputStream); + DataInputStream dataInputStream = new DataInputStream(zstdInputStream)) { byte[] longBytes = new byte[8]; byte[] intBytes = new byte[4]; dataInputStream.readFully(longBytes); diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java b/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java index f1de4f8728aa4..fec925782d690 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java @@ -19,17 +19,21 @@ import com.facebook.presto.spi.ConnectorTableHandle; import com.facebook.presto.spi.ConnectorTableLayout; import com.facebook.presto.spi.ConnectorTableLayoutHandle; +import com.facebook.presto.spi.ConnectorTableLayoutResult; import com.facebook.presto.spi.ConnectorTableMetadata; +import com.facebook.presto.spi.Constraint; import com.facebook.presto.spi.SchemaTableName; import com.facebook.presto.spi.SchemaTablePrefix; import com.facebook.presto.spi.connector.ConnectorMetadata; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.inject.Inject; + +import javax.inject.Inject; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; public class ClpMetadata implements ConnectorMetadata @@ -72,6 +76,17 @@ public ConnectorTableHandle getTableHandle(ConnectorSession session, SchemaTable return new ClpTableHandle(tableName.getTableName()); } + @Override + public List getTableLayouts(ConnectorSession session, + ConnectorTableHandle table, + Constraint constraint, + Optional> desiredColumns) + { + ClpTableHandle tableHandle = (ClpTableHandle) table; + ConnectorTableLayout layout = new ConnectorTableLayout(new ClpTableLayoutHandle(tableHandle)); + return ImmutableList.of(new ConnectorTableLayoutResult(layout, constraint.getSummary())); + } + @Override public ConnectorTableLayout getTableLayout(ConnectorSession session, ConnectorTableLayoutHandle handle) { diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java index 245ee4eba3771..990cfcc61dac2 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java @@ -16,6 +16,7 @@ import com.facebook.presto.common.type.Type; import com.facebook.presto.spi.RecordCursor; import com.facebook.presto.spi.RecordSet; +import com.google.common.collect.ImmutableList; import java.io.BufferedReader; import java.util.List; @@ -39,7 +40,7 @@ public ClpRecordSet(BufferedReader reader, boolean isPolymorphicTypeEnabled, Lis @Override public List getColumnTypes() { - return null; + return columnHandles.stream().map(ClpColumnHandle::getColumnType).collect(ImmutableList.toImmutableList()); } @Override diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java b/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java index d4849580f2f6d..1fb26c22970b1 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java @@ -14,20 +14,39 @@ package com.yscope.presto; import com.facebook.presto.spi.ConnectorSession; +import com.facebook.presto.spi.ConnectorSplit; import com.facebook.presto.spi.ConnectorSplitSource; import com.facebook.presto.spi.ConnectorTableLayoutHandle; +import com.facebook.presto.spi.FixedSplitSource; import com.facebook.presto.spi.connector.ConnectorSplitManager; import com.facebook.presto.spi.connector.ConnectorTransactionHandle; +import java.util.Collections; +import java.util.List; + public class ClpSplitManager implements ConnectorSplitManager { + private final ClpClient clpClient; + + public ClpSplitManager(ClpClient clpClient) + { + this.clpClient = clpClient; + } + @Override public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableLayoutHandle layout, SplitSchedulingContext splitSchedulingContext) { - return null; + ClpTableLayoutHandle layoutHandle = (ClpTableLayoutHandle) layout; + ClpTableHandle tableHandle = layoutHandle.getTable(); + if (!clpClient.listTables().contains(tableHandle.getTableName())) { + throw new RuntimeException("Table no longer exists: " + tableHandle.getTableName()); + } + List splits = Collections.singletonList(new ClpSplit("default", tableHandle.getTableName())); + + return new FixedSplitSource(splits); } } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpTableLayoutHandle.java b/presto-clp/src/main/java/com/yscope/presto/ClpTableLayoutHandle.java index d44c82aaa31f5..9a5f51b4e1b68 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpTableLayoutHandle.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpTableLayoutHandle.java @@ -14,8 +14,50 @@ package com.yscope.presto; import com.facebook.presto.spi.ConnectorTableLayoutHandle; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +import java.util.Objects; public class ClpTableLayoutHandle implements ConnectorTableLayoutHandle { + private final ClpTableHandle table; + + @JsonCreator + public ClpTableLayoutHandle(@JsonProperty("table") ClpTableHandle table) + { + this.table = table; + } + + @JsonProperty + public ClpTableHandle getTable() + { + return table; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ClpTableLayoutHandle that = (ClpTableLayoutHandle) o; + return Objects.equals(table, that.table); + } + + @Override + public int hashCode() + { + return Objects.hash(table); + } + + @Override + public String toString() + { + return table.toString(); + } } diff --git a/presto-clp/src/test/java/com/yscope/presto/TestRecordCursor.java b/presto-clp/src/test/java/com/yscope/presto/TestRecordCursor.java index c558b4022475e..a5efb0e879d43 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestRecordCursor.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestRecordCursor.java @@ -13,13 +13,20 @@ */ package com.yscope.presto; +import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; +import java.util.ArrayList; + +import static com.facebook.presto.testing.TestingConnectorSession.SESSION; +import static org.testng.Assert.assertNotNull; + @Test(singleThreaded = true) public class TestRecordCursor { private ClpClient clpClient; + @BeforeMethod public void setUp() { @@ -27,5 +34,28 @@ public void setUp() .setPolymorphicTypeEnabled(true) .setClpExecutablePath("/usr/local/bin/clp-s"); clpClient = new ClpClient(config); + clpClient.start(); + } + + @Test + public void testRecordCursor() + { + ClpRecordSetProvider recordSetProvider = new ClpRecordSetProvider(clpClient); + ClpRecordSet recordSet = (ClpRecordSet) recordSetProvider.getRecordSet( + ClpTransactionHandle.INSTANCE, + SESSION, + new ClpSplit("default", "test_1_table"), + new ArrayList<>(clpClient.listColumns("test_1_table"))); + assertNotNull(recordSet, "recordSet is null"); + ClpRecordCursor cursor = (ClpRecordCursor) recordSet.cursor(); + assertNotNull(cursor, "cursor is null"); + cursor.advanceNextPosition(); + cursor.advanceNextPosition(); + } + + @AfterMethod + public void tearDown() + { + clpClient.close(); } } From bfe9b7efcdfb9d62cb4460655761318204deaaad Mon Sep 17 00:00:00 2001 From: wraymo Date: Mon, 10 Jun 2024 16:55:16 -0400 Subject: [PATCH 014/126] make iteration predictable --- .../java/com/yscope/presto/ClpClient.java | 37 +++++++++++++++---- .../com/yscope/presto/ClpRecordCursor.java | 15 ++++---- .../com/yscope/presto/TestRecordCursor.java | 6 +++ 3 files changed, 43 insertions(+), 15 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 9b29dd601abea..1ac49832fe382 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -36,12 +36,15 @@ import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; import java.nio.file.DirectoryStream; +import java.nio.file.FileVisitResult; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.nio.file.SimpleFileVisitor; +import java.nio.file.attribute.BasicFileAttributes; import java.util.ArrayList; import java.util.HashMap; -import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -81,7 +84,27 @@ public void start() public void close() { try { - Files.deleteIfExists(decompressDir); + Files.walkFileTree(decompressDir, new SimpleFileVisitor() + { + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException + { + Files.delete(file); + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException + { + if (exc == null) { + Files.delete(dir); + return FileVisitResult.CONTINUE; + } + else { + throw exc; // Directory iteration failed + } + } + }); } catch (IOException e) { log.error(e, "Failed to delete decompression directory"); @@ -164,7 +187,7 @@ public Set listColumns(String tableName) } Path tableDir = Paths.get(config.getClpArchiveDir(), tableName); - HashSet columnHandles = new HashSet<>(); + LinkedHashSet columnHandles = new LinkedHashSet<>(); if (!Files.exists(tableDir) || !Files.isDirectory(tableDir)) { return ImmutableSet.of(); } @@ -245,8 +268,8 @@ private Set parseSchemaTreeFile(Path schemaMapsFile) { SchemaTree schemaTree = new SchemaTree(); try (InputStream fileInputStream = Files.newInputStream(schemaMapsFile); - ZstdInputStream zstdInputStream = new ZstdInputStream(fileInputStream); - DataInputStream dataInputStream = new DataInputStream(zstdInputStream)) { + ZstdInputStream zstdInputStream = new ZstdInputStream(fileInputStream); + DataInputStream dataInputStream = new DataInputStream(zstdInputStream)) { byte[] longBytes = new byte[8]; byte[] intBytes = new byte[4]; dataInputStream.readFully(longBytes); @@ -264,7 +287,7 @@ private Set parseSchemaTreeFile(Path schemaMapsFile) } ArrayList primitiveTypeFields = schemaTree.getPrimitiveFields(); - HashSet columnHandles = new HashSet<>(); + LinkedHashSet columnHandles = new LinkedHashSet<>(); for (SchemaNode.NodeTuple nodeTuple : primitiveTypeFields) { SchemaNode.NodeType nodeType = nodeTuple.getType(); Type prestoType = null; @@ -298,7 +321,7 @@ private Set parseSchemaTreeFile(Path schemaMapsFile) private Set handlePolymorphicType(Set columnHandles) { Map> columnNameToColumnHandles = new HashMap<>(); - Set polymorphicColumnHandles = new HashSet<>(); + LinkedHashSet polymorphicColumnHandles = new LinkedHashSet<>(); for (ClpColumnHandle columnHandle : columnHandles) { columnNameToColumnHandles.computeIfAbsent(columnHandle.getColumnName(), k -> new ArrayList<>()) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java index d2c94a46aa53e..338e6c0371317 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java @@ -26,7 +26,6 @@ import java.util.List; import java.util.Map; -import static com.facebook.presto.common.type.BigintType.BIGINT; import static com.facebook.presto.common.type.BooleanType.BOOLEAN; import static com.facebook.presto.common.type.DoubleType.DOUBLE; import static com.facebook.presto.common.type.IntegerType.INTEGER; @@ -40,7 +39,7 @@ public class ClpRecordCursor private final BufferedReader reader; private final boolean isPolymorphicTypeEnabled; private final List columnHandles; - private final List fields; + private final List fields; public ClpRecordCursor(BufferedReader reader, boolean isPolymorphicTypeEnabled, List columnHandles) { @@ -100,28 +99,28 @@ private void checkFieldType(int field, Type expected) public boolean getBoolean(int field) { checkFieldType(field, BOOLEAN); - return Boolean.parseBoolean(fields.get(field)); + return fields.get(field).asBoolean(); } @Override public long getLong(int field) { - checkFieldType(field, BIGINT); - return Long.parseLong(fields.get(field)); + checkFieldType(field, INTEGER); + return fields.get(field).asLong(); } @Override public double getDouble(int field) { checkFieldType(field, DOUBLE); - return Double.parseDouble(fields.get(field)); + return fields.get(field).asDouble(); } @Override public Slice getSlice(int field) { checkFieldType(field, createUnboundedVarcharType()); - return Slices.utf8Slice(fields.get(field)); + return Slices.utf8Slice(fields.get(field).asText()); } @Override @@ -157,7 +156,7 @@ private void parseLine(JsonNode node, String prefix) if (index == -1) { return; } - fields.set(index, node.toString()); + fields.set(index, node); } } diff --git a/presto-clp/src/test/java/com/yscope/presto/TestRecordCursor.java b/presto-clp/src/test/java/com/yscope/presto/TestRecordCursor.java index a5efb0e879d43..82156236646ed 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestRecordCursor.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestRecordCursor.java @@ -20,7 +20,9 @@ import java.util.ArrayList; import static com.facebook.presto.testing.TestingConnectorSession.SESSION; +import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; @Test(singleThreaded = true) public class TestRecordCursor @@ -50,6 +52,10 @@ public void testRecordCursor() ClpRecordCursor cursor = (ClpRecordCursor) recordSet.cursor(); assertNotNull(cursor, "cursor is null"); cursor.advanceNextPosition(); + assertEquals(cursor.getLong(0), 1); + assertEquals(cursor.getDouble(2), 2.0); + assertTrue(cursor.getBoolean(5)); + assertEquals(cursor.getSlice(6).toStringUtf8(), "Hello world"); cursor.advanceNextPosition(); } From 383efe9de06d91cf7fdad84ad030b25de4778d6b Mon Sep 17 00:00:00 2001 From: wraymo Date: Mon, 10 Jun 2024 17:03:17 -0400 Subject: [PATCH 015/126] clean the code --- .../com/yscope/presto/TestClpMetadata.java | 38 +++++++------------ ...rdCursor.java => TestClpRecordCursor.java} | 2 +- 2 files changed, 15 insertions(+), 25 deletions(-) rename presto-clp/src/test/java/com/yscope/presto/{TestRecordCursor.java => TestClpRecordCursor.java} (98%) diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java index 8de51913827ef..261da970c0f01 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java @@ -72,27 +72,27 @@ public void testGetTable1Metadata() .setNullable(true) .build()); columnMetadata.add(ColumnMetadata.builder() - .setName("b_double") - .setType(DoubleType.DOUBLE) + .setName("a_varchar") + .setType(VarcharType.VARCHAR) .setNullable(true) .build()); columnMetadata.add(ColumnMetadata.builder() - .setName("c.d") - .setType(BooleanType.BOOLEAN) + .setName("b_double") + .setType(DoubleType.DOUBLE) .setNullable(true) .build()); columnMetadata.add(ColumnMetadata.builder() - .setName("c.e") + .setName("b_varchar") .setType(VarcharType.VARCHAR) .setNullable(true) .build()); columnMetadata.add(ColumnMetadata.builder() - .setName("a_varchar") - .setType(VarcharType.VARCHAR) + .setName("c.d") + .setType(BooleanType.BOOLEAN) .setNullable(true) .build()); columnMetadata.add(ColumnMetadata.builder() - .setName("b_varchar") + .setName("c.e") .setType(VarcharType.VARCHAR) .setNullable(true) .build()); @@ -113,7 +113,7 @@ public void testGetTable2Metadata() .setNullable(true) .build()); columnMetadata.add(ColumnMetadata.builder() - .setName("b.c") + .setName("a_varchar") .setType(VarcharType.VARCHAR) .setNullable(true) .build()); @@ -123,40 +123,30 @@ public void testGetTable2Metadata() .setNullable(true) .build()); columnMetadata.add(ColumnMetadata.builder() - .setName("c.d") + .setName("b_varchar") .setType(VarcharType.VARCHAR) .setNullable(true) .build()); columnMetadata.add(ColumnMetadata.builder() - .setName("c.e") + .setName("c") .setType(BooleanType.BOOLEAN) .setNullable(true) .build()); columnMetadata.add(ColumnMetadata.builder() - .setName("a_varchar") + .setName("b.c") .setType(VarcharType.VARCHAR) .setNullable(true) .build()); columnMetadata.add(ColumnMetadata.builder() - .setName("b_varchar") + .setName("c.d") .setType(VarcharType.VARCHAR) .setNullable(true) .build()); columnMetadata.add(ColumnMetadata.builder() - .setName("c") + .setName("c.e") .setType(BooleanType.BOOLEAN) .setNullable(true) .build()); assertEquals(columnMetadata, new HashSet<>(tableMetadata.getColumns())); } - - @Test - public void testGetTableHandle() - { - ArrayList t = new ArrayList(2); - if (t.get(0) == null) { - System.out.println("null"); - } - t.set(0, "default"); - } } diff --git a/presto-clp/src/test/java/com/yscope/presto/TestRecordCursor.java b/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java similarity index 98% rename from presto-clp/src/test/java/com/yscope/presto/TestRecordCursor.java rename to presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java index 82156236646ed..7416073ac4a24 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestRecordCursor.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java @@ -25,7 +25,7 @@ import static org.testng.Assert.assertTrue; @Test(singleThreaded = true) -public class TestRecordCursor +public class TestClpRecordCursor { private ClpClient clpClient; From 8c2574e23cedb23ac95997e4b01b99cd86446992 Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 11 Jun 2024 09:42:45 -0400 Subject: [PATCH 016/126] fix a test case --- .../src/main/java/com/yscope/presto/ClpMetadata.java | 8 +++----- .../src/test/java/com/yscope/presto/TestClpMetadata.java | 7 ++++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java b/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java index fec925782d690..7c1f4e77af870 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java @@ -55,11 +55,9 @@ public List listSchemaNames(ConnectorSession session) @Override public List listTables(ConnectorSession session, Optional schemaName) { - ImmutableList.Builder builder = ImmutableList.builder(); - for (String tableName : clpClient.listTables()) { - builder.add(new SchemaTableName("default", tableName)); - } - return builder.build(); + return clpClient.listTables().stream() + .map(tableName -> new SchemaTableName("default", tableName)) + .collect(ImmutableList.toImmutableList()); } @Override diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java index 261da970c0f01..770e819000a92 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java @@ -54,9 +54,10 @@ public void testListSchemaNames() @Test public void testListTables() { - assertEquals(metadata.listTables(SESSION, Optional.empty()), - ImmutableList.of(new SchemaTableName("default", "test_1_table"), - new SchemaTableName("default", "test_2_table"))); + HashSet tables = new HashSet<>(); + tables.add(new SchemaTableName("default", "test_1_table")); + tables.add(new SchemaTableName("default", "test_2_table")); + assertEquals(new HashSet<>(metadata.listTables(SESSION, Optional.empty())), tables); } @Test From 93570fe82147563fd3692a83badf29e96a9d3802 Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 11 Jun 2024 10:58:28 -0400 Subject: [PATCH 017/126] remove unused dependency --- presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java | 1 - 1 file changed, 1 deletion(-) diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java index 770e819000a92..bcb63a38b29f9 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java @@ -24,7 +24,6 @@ import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; -import java.util.ArrayList; import java.util.HashSet; import java.util.Optional; From 856a53f0f8d12a2cf0ec6f71850d15d372226641 Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 11 Jun 2024 12:00:37 -0400 Subject: [PATCH 018/126] fix bugs to make it work --- .../src/main/java/com/yscope/presto/ClpColumnHandle.java | 2 ++ .../src/main/java/com/yscope/presto/ClpRecordCursor.java | 2 +- .../src/main/java/com/yscope/presto/ClpSplitManager.java | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java b/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java index 5ad39ece53ebf..177506cfd1e52 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java @@ -16,6 +16,7 @@ import com.facebook.presto.common.type.Type; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.ColumnMetadata; +import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import java.util.Objects; @@ -29,6 +30,7 @@ public class ClpColumnHandle private final Type columnType; private final boolean nullable; + @JsonCreator public ClpColumnHandle( @JsonProperty("columnName") String columnName, @JsonProperty("columnType") Type columnType, diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java index 338e6c0371317..ce4a420413466 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java @@ -86,7 +86,7 @@ public boolean advanceNextPosition() return false; } - return false; + return true; } private void checkFieldType(int field, Type expected) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java b/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java index 1fb26c22970b1..744c8fb1d0cb6 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java @@ -21,6 +21,8 @@ import com.facebook.presto.spi.connector.ConnectorSplitManager; import com.facebook.presto.spi.connector.ConnectorTransactionHandle; +import javax.inject.Inject; + import java.util.Collections; import java.util.List; @@ -29,6 +31,7 @@ public class ClpSplitManager { private final ClpClient clpClient; + @Inject public ClpSplitManager(ClpClient clpClient) { this.clpClient = clpClient; From 4165e1b4bd8b42ffc355357cb405ec3cb9071152 Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 11 Jun 2024 13:28:34 -0400 Subject: [PATCH 019/126] modify tests --- .../java/com/yscope/presto/ClpClient.java | 1 + .../com/yscope/presto/TestClpMetadata.java | 27 +++------- .../yscope/presto/TestClpRecordCursor.java | 50 ++++++++++++++++-- .../log.dict | Bin 39 -> 0 bytes .../schema_ids | Bin 67 -> 0 bytes .../schema_tree | Bin 78 -> 0 bytes .../table_metadata | Bin 55 -> 0 bytes .../tables | Bin 102 -> 0 bytes .../var.dict | Bin 54 -> 0 bytes .../array.dict | Bin .../log.dict | Bin 0 -> 8 bytes .../schema_ids | Bin 0 -> 40 bytes .../schema_tree | Bin 0 -> 76 bytes .../table_metadata | Bin 0 -> 29 bytes .../tables | Bin 0 -> 157 bytes .../timestamp.dict | Bin .../var.dict | Bin 0 -> 250 bytes presto-clp/src/test/resources/logs/test_2 | 17 ++++-- 18 files changed, 67 insertions(+), 28 deletions(-) delete mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/log.dict delete mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/schema_ids delete mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/schema_tree delete mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/table_metadata delete mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/tables delete mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/var.dict rename presto-clp/src/test/resources/clp_archive/test_2_table/{74d74cae-adc3-4ab8-aab0-3aeb5104740b => a9a3ba79-44d6-4eef-995a-c4a0dfa64f66}/array.dict (100%) create mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/log.dict create mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/schema_ids create mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/schema_tree create mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/table_metadata create mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/tables rename presto-clp/src/test/resources/clp_archive/test_2_table/{74d74cae-adc3-4ab8-aab0-3aeb5104740b => a9a3ba79-44d6-4eef-995a-c4a0dfa64f66}/timestamp.dict (100%) create mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/var.dict diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 1ac49832fe382..d3575c7d5dd36 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -232,6 +232,7 @@ public BufferedReader getRecords(String tableName) if (!DecompressRecords(tableName)) { return null; } + log.info("Decompress records to %s", decompressFile.toString()); } try { diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java index bcb63a38b29f9..a2d54553fd02c 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java @@ -108,45 +108,30 @@ public void testGetTable2Metadata() ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(SESSION, clpTableHandle); HashSet columnMetadata = new HashSet<>(); columnMetadata.add(ColumnMetadata.builder() - .setName("a_double") - .setType(DoubleType.DOUBLE) - .setNullable(true) - .build()); - columnMetadata.add(ColumnMetadata.builder() - .setName("a_varchar") - .setType(VarcharType.VARCHAR) - .setNullable(true) - .build()); - columnMetadata.add(ColumnMetadata.builder() - .setName("b_integer") + .setName("id") .setType(IntegerType.INTEGER) .setNullable(true) .build()); columnMetadata.add(ColumnMetadata.builder() - .setName("b_varchar") + .setName("name") .setType(VarcharType.VARCHAR) .setNullable(true) .build()); columnMetadata.add(ColumnMetadata.builder() - .setName("c") - .setType(BooleanType.BOOLEAN) + .setName("age") + .setType(IntegerType.INTEGER) .setNullable(true) .build()); columnMetadata.add(ColumnMetadata.builder() - .setName("b.c") + .setName("city") .setType(VarcharType.VARCHAR) .setNullable(true) .build()); columnMetadata.add(ColumnMetadata.builder() - .setName("c.d") + .setName("state") .setType(VarcharType.VARCHAR) .setNullable(true) .build()); - columnMetadata.add(ColumnMetadata.builder() - .setName("c.e") - .setType(BooleanType.BOOLEAN) - .setNullable(true) - .build()); assertEquals(columnMetadata, new HashSet<>(tableMetadata.getColumns())); } } diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java b/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java index 7416073ac4a24..cb8bac554acc4 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java @@ -18,9 +18,11 @@ import org.testng.annotations.Test; import java.util.ArrayList; +import java.util.List; import static com.facebook.presto.testing.TestingConnectorSession.SESSION; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; import static org.testng.Assert.assertTrue; @@ -39,8 +41,15 @@ public void setUp() clpClient.start(); } + public void assertNull(ClpRecordCursor cursor, List indices) + { + for (int index : indices) { + assertTrue(cursor.isNull(index)); + } + } + @Test - public void testRecordCursor() + public void testTable1RecordCursor() { ClpRecordSetProvider recordSetProvider = new ClpRecordSetProvider(clpClient); ClpRecordSet recordSet = (ClpRecordSet) recordSetProvider.getRecordSet( @@ -51,12 +60,47 @@ public void testRecordCursor() assertNotNull(recordSet, "recordSet is null"); ClpRecordCursor cursor = (ClpRecordCursor) recordSet.cursor(); assertNotNull(cursor, "cursor is null"); - cursor.advanceNextPosition(); + assertTrue(cursor.advanceNextPosition()); assertEquals(cursor.getLong(0), 1); assertEquals(cursor.getDouble(2), 2.0); assertTrue(cursor.getBoolean(5)); assertEquals(cursor.getSlice(6).toStringUtf8(), "Hello world"); - cursor.advanceNextPosition(); + assertNull(cursor, List.of(1, 3, 4)); + assertTrue(cursor.advanceNextPosition()); + assertEquals(cursor.getLong(0), 2); + assertEquals(cursor.getDouble(2), 3.0); + assertFalse(cursor.getBoolean(5)); + assertEquals(cursor.getSlice(6).toStringUtf8(), "Goodbye world"); + assertNull(cursor, List.of(1, 3, 4)); + assertTrue(cursor.advanceNextPosition()); + assertEquals(cursor.getSlice(1).toStringUtf8(), "foo"); + assertEquals(cursor.getSlice(3).toStringUtf8(), "bar"); + assertEquals(cursor.getDouble(4), 2.0); + assertNull(cursor, List.of(0, 2, 5, 6)); + assertTrue(cursor.advanceNextPosition()); + assertEquals(cursor.getSlice(1).toStringUtf8(), "baz"); + assertEquals(cursor.getSlice(3).toStringUtf8(), "qux"); + assertNull(cursor, List.of(0, 2, 4, 5, 6)); + assertFalse(cursor.advanceNextPosition()); + } + + @Test + public void testTable2RecordCursor() + { + ClpRecordSetProvider recordSetProvider = new ClpRecordSetProvider(clpClient); + ClpRecordSet recordSet = (ClpRecordSet) recordSetProvider.getRecordSet( + ClpTransactionHandle.INSTANCE, + SESSION, + new ClpSplit("default", "test_2_table"), + new ArrayList<>(clpClient.listColumns("test_2_table"))); + assertNotNull(recordSet, "recordSet is null"); + ClpRecordCursor cursor = (ClpRecordCursor) recordSet.cursor(); + assertNotNull(cursor, "cursor is null"); + for (int i = 0; i <= 12; i++) { + assertTrue(cursor.advanceNextPosition()); + assertEquals(cursor.getLong(3), i); + } + assertFalse(cursor.advanceNextPosition()); } @AfterMethod diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/log.dict b/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/log.dict deleted file mode 100644 index d9e073f455a13474d299a33f03688e771165724e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 39 kcmZQ%fB=oH`hOWBHZm~qK?QP4b4oG`a#9t_^NUi70fI;f-2eap diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/schema_ids b/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/schema_ids deleted file mode 100644 index 52f098ca733b93ba4d0d32b5ee9722ff2e884618..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 67 zcmdPcs{fZE;wmG92onnf6C(oy12YRF8<1jW<^YnMK+FYVaDzGC3JI%g6z(tGv@T`a O=_J+HdYgH-FaiKg3JsJ1 diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/schema_tree b/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/schema_tree deleted file mode 100644 index 1b79f476c5adee39347dc04e806d3e02d07551c8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 78 zcmdPcs{fZELYIl5go%gYKM*jmG5`T1Ln31mD^oHvLlQ$WD{BgKDhop*b5b%(5)&^& fgL8qw0ciu72fPw|AJ}@B+qzFmOGu~KGUxyR4JH$s diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/table_metadata b/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/table_metadata deleted file mode 100644 index 2788e97d9953cfd1ed5b998649b89c188b7414e9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 55 zcmdPcs{fZEqLh(g0}BHqg8&1gq$HEIAake$0|an0C~zCF8!<;VSWb{U@#%7mL4txI G&jtV(KnrmI diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/tables b/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/tables deleted file mode 100644 index 5b0c0a51c215ffa5415fce40eba0d4cdf0c6f3e9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 102 zcmdPcs{fZEqLG1No%_QokvJy@1}M+~OPpk2(7&s~)_B#@K?o|rh^nHKfq@As!VFUZ R(d){_A;65s#; diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/var.dict b/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/var.dict deleted file mode 100644 index e246a7ea76f8b3d84055d9ef42c41b010111c2f8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 54 zcmZQ&fB=oH`hOWBbQu|Turhe0=H%qFGL+{R<)kn(q~+%)B^Fh&G9>Ve8yzr6n8BF9 F2mnPg4R8Pe diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/array.dict b/presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/array.dict similarity index 100% rename from presto-clp/src/test/resources/clp_archive/test_2_table/74d74cae-adc3-4ab8-aab0-3aeb5104740b/array.dict rename to presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/array.dict diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/log.dict b/presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/log.dict new file mode 100644 index 0000000000000000000000000000000000000000..1b1cb4d44c57c2d7a5122870fa6ac3e62ff7e94e GIT binary patch literal 8 KcmZQzfB*mh2mk>9 literal 0 HcmV?d00001 diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/schema_ids b/presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/schema_ids new file mode 100644 index 0000000000000000000000000000000000000000..ab77a0ddfa1bf93214014ae46b344a023c0b4b6d GIT binary patch literal 40 ocmdPcs{fZE;x7Zk4n_u6Mg|53CLm@8Viq7~1@m|7m^H8h0GS&F&;S4c literal 0 HcmV?d00001 diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/schema_tree b/presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/schema_tree new file mode 100644 index 0000000000000000000000000000000000000000..3fa58b754f501b28944778ec8a7a0367f575e36e GIT binary patch literal 76 zcmdPcs{fZELY9f)3KJW{e;{CBWdH&uhRhTO7KXgU+*D=|9|#iDQ literal 0 HcmV?d00001 diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/tables b/presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/tables new file mode 100644 index 0000000000000000000000000000000000000000..77cfca5fed993e6fab8e42573775c100168de7bf GIT binary patch literal 157 zcmdPcs{fZEVkrxQ5UT*6!MzGz1=HWFc?0r=XEOb_cZt&yWp{e8u|$qfTDXnzfNqM) zVy`znIweByFRdtU)Zj=6K79Y3S@l}K#Z%8eW42XzP|h6rDRr7^d>VpU6-*9J#*JOLgAC_^5;KZHhBwL}u>N^& Aga7~l literal 0 HcmV?d00001 diff --git a/presto-clp/src/test/resources/logs/test_2 b/presto-clp/src/test/resources/logs/test_2 index a011c4b41301f..a8c036b602f0e 100644 --- a/presto-clp/src/test/resources/logs/test_2 +++ b/presto-clp/src/test/resources/logs/test_2 @@ -1,4 +1,13 @@ -{"a": 36.735, "b": {"c": "Hello"}} -{"a": 25.834, "b": 18, "c": {"d": "world", "e": false}} -{"a": "foo", "b": "bar", "c": true} -{"a": "baz", "b": "multiple words"} \ No newline at end of file +{"id": 0, "name": "John", "age": 32, "city": "Charlotte", "state": "NC"} +{"id": 1, "name": "Jane", "age": 25, "city": "Chicago", "state": "IL"} +{"id": 2, "name": "Doe", "age": 43, "city": "Nashville", "state": "TN"} +{"id": 3, "name": "Jack", "age": 29, "city": "Columbus", "state": "OH"} +{"id": 4, "name": "Jill", "age": 35, "city": "Seattle", "state": "WA"} +{"id": 5, "name": "Joe", "age": 38, "city": "Boston", "state": "MA"} +{"id": 6, "name": "Jenny", "age": 27, "city": "Miami", "state": "FL"} +{"id": 7, "name": "Jim", "age": 31, "city": "Denver", "state": "CO"} +{"id": 8, "name": "Judy", "age": 40, "city": "Houston", "state": "TX"} +{"id": 9, "name": "Jerry", "age": 33, "city": "Philadelphia", "state": "PA"} +{"id": 10, "name": "Jesse", "age": 30, "city": "Phoenix", "state": "AZ"} +{"id": 11, "name": "Jasmine", "age": 28, "city": "Austin", "state": "TX"} +{"id": 12, "name": "Jared", "age": 26, "city": "Portland", "state": "OR"} \ No newline at end of file From a2fbc13b0fc528733a1327a7d21748c61f6af9b1 Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 14 Jun 2024 17:34:15 -0400 Subject: [PATCH 020/126] fix bugs --- .../main/java/com/yscope/presto/ClpClient.java | 6 ++++-- .../com/yscope/presto/ClpRecordCursor.java | 18 ++++++++++++------ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index d3575c7d5dd36..72ed93ae84a1d 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -14,9 +14,9 @@ package com.yscope.presto; import com.facebook.airlift.log.Logger; +import com.facebook.presto.common.type.BigintType; import com.facebook.presto.common.type.BooleanType; import com.facebook.presto.common.type.DoubleType; -import com.facebook.presto.common.type.IntegerType; import com.facebook.presto.common.type.Type; import com.facebook.presto.common.type.VarcharType; import com.github.luben.zstd.ZstdInputStream; @@ -294,7 +294,7 @@ private Set parseSchemaTreeFile(Path schemaMapsFile) Type prestoType = null; switch (nodeType) { case Integer: - prestoType = IntegerType.INTEGER; + prestoType = BigintType.BIGINT; break; case Float: prestoType = DoubleType.DOUBLE; @@ -302,6 +302,8 @@ private Set parseSchemaTreeFile(Path schemaMapsFile) case ClpString: case VarString: case DateString: + case UnstructuredArray: + case NullValue: prestoType = VarcharType.VARCHAR; break; case Boolean: diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java index ce4a420413466..da12db9501dde 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java @@ -26,9 +26,9 @@ import java.util.List; import java.util.Map; +import static com.facebook.presto.common.type.BigintType.BIGINT; import static com.facebook.presto.common.type.BooleanType.BOOLEAN; import static com.facebook.presto.common.type.DoubleType.DOUBLE; -import static com.facebook.presto.common.type.IntegerType.INTEGER; import static com.facebook.presto.common.type.VarcharType.VARCHAR; import static com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType; import static com.google.common.base.Preconditions.checkArgument; @@ -105,7 +105,7 @@ public boolean getBoolean(int field) @Override public long getLong(int field) { - checkFieldType(field, INTEGER); + checkFieldType(field, BIGINT); return fields.get(field).asLong(); } @@ -120,7 +120,13 @@ public double getDouble(int field) public Slice getSlice(int field) { checkFieldType(field, createUnboundedVarcharType()); - return Slices.utf8Slice(fields.get(field).asText()); + JsonNode node = fields.get(field); + if (node.isArray()) { + return Slices.utf8Slice(node.toString()); + } + else { + return Slices.utf8Slice(node.asText()); + } } @Override @@ -132,7 +138,7 @@ public Object getObject(int field) @Override public boolean isNull(int field) { - return fields.get(field) == null; + return fields.get(field) == null || fields.get(field).isNull(); } @Override @@ -163,7 +169,7 @@ private void parseLine(JsonNode node, String prefix) private String jsonNodeToTypeString(JsonNode node) { if (node.isIntegralNumber()) { - return INTEGER.getDisplayName(); + return BIGINT.getDisplayName(); } if (node.isFloatingPointNumber()) { return DOUBLE.getDisplayName(); @@ -171,7 +177,7 @@ private String jsonNodeToTypeString(JsonNode node) if (node.isBoolean()) { return BOOLEAN.getDisplayName(); } - if (node.isTextual()) { + if (node.isTextual() || node.isArray() || node.isNull()) { return VARCHAR.getDisplayName(); } return "unknown"; From 22dbb8354991af23a39ab552cc21133220425d9e Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 25 Jun 2024 22:20:03 -0400 Subject: [PATCH 021/126] add basic query pushdown --- .../java/com/yscope/presto/ClpClient.java | 104 ++++++++++++++++-- .../com/yscope/presto/ClpPlanOptimizer.java | 65 +++++++++++ .../yscope/presto/ClpRecordSetProvider.java | 2 +- .../main/java/com/yscope/presto/ClpSplit.java | 13 ++- .../com/yscope/presto/ClpSplitManager.java | 4 +- .../com/yscope/presto/ClpTableHandle.java | 19 +++- 6 files changed, 195 insertions(+), 12 deletions(-) create mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 72ed93ae84a1d..6695fb06055b2 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -19,6 +19,9 @@ import com.facebook.presto.common.type.DoubleType; import com.facebook.presto.common.type.Type; import com.facebook.presto.common.type.VarcharType; +import com.facebook.presto.spi.relation.CallExpression; +import com.facebook.presto.spi.relation.RowExpression; +import com.facebook.presto.spi.relation.SpecialFormExpression; import com.github.luben.zstd.ZstdInputStream; import com.google.common.collect.ImmutableSet; import com.yscope.presto.schema.SchemaNode; @@ -32,6 +35,7 @@ import java.io.DataInputStream; import java.io.IOException; import java.io.InputStream; +import java.io.InputStreamReader; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; @@ -47,6 +51,7 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import static java.util.Objects.requireNonNull; @@ -221,30 +226,113 @@ public Set listColumns(String tableName) return polymorphicColumnHandles; } - public BufferedReader getRecords(String tableName) + public BufferedReader getRecords(String tableName, Optional additionalPredicate) { if (!listTables().contains(tableName)) { return null; } - Path decompressFile = decompressDir.resolve(tableName).resolve("original"); - if (!Files.exists(decompressFile) || !Files.isRegularFile(decompressFile)) { - if (!DecompressRecords(tableName)) { + if (additionalPredicate.isPresent()) { + return searchTable(tableName, additionalPredicate.get()); + } + else { + Path decompressFile = decompressDir.resolve(tableName).resolve("original"); + if (!Files.exists(decompressFile) || !Files.isRegularFile(decompressFile)) { + if (!decompressRecords(tableName)) { + return null; + } + log.info("Decompress records to %s", decompressFile.toString()); + } + + try { + return Files.newBufferedReader(decompressFile); + } + catch (IOException e) { + log.error(e, "Failed to get records for table %s", tableName); return null; } - log.info("Decompress records to %s", decompressFile.toString()); } + } + private String buildKqlQuery(RowExpression additionalPredicate) + { + if (additionalPredicate instanceof SpecialFormExpression) { + SpecialFormExpression specialFormExpression = (SpecialFormExpression) additionalPredicate; + if (specialFormExpression.getForm() == SpecialFormExpression.Form.AND) { + StringBuilder queryBuilder = new StringBuilder(); + queryBuilder.append("("); + for (RowExpression argument : specialFormExpression.getArguments()) { + queryBuilder.append(buildKqlQuery(argument)); + queryBuilder.append(" AND "); + } + return queryBuilder.substring(0, queryBuilder.length() - 5) + ")"; + } + else if (specialFormExpression.getForm() == SpecialFormExpression.Form.OR) { + StringBuilder queryBuilder = new StringBuilder(); + queryBuilder.append("("); + for (RowExpression argument : specialFormExpression.getArguments()) { + queryBuilder.append(buildKqlQuery(argument)); + queryBuilder.append(" OR "); + } + return queryBuilder.substring(0, queryBuilder.length() - 4) + ")"; + } + } + else if (additionalPredicate instanceof CallExpression) { + CallExpression callExpression = (CallExpression) additionalPredicate; + switch (callExpression.getDisplayName()) { + case "EQUAL": + return callExpression.getArguments().get(0).toString() + ": \"" + callExpression.getArguments() + .get(1) + .toString() + "\""; + case "<>": + return "NOT " + callExpression.getArguments() + .get(0) + .toString() + ": \"" + callExpression.getArguments() + .get(1) + .toString() + "\""; + case "GREATER_THAN": + return callExpression.getArguments().get(0).toString() + " > " + callExpression.getArguments() + .get(1) + .toString(); + case "GREATER_THAN_OR_EQUAL": + return callExpression.getArguments().get(0).toString() + " >= " + callExpression.getArguments() + .get(1) + .toString(); + case "LESS_THAN": + return callExpression.getArguments().get(0).toString() + " < " + callExpression.getArguments() + .get(1) + .toString(); + case "LESS_THAN_OR_EQUAL": + return callExpression.getArguments().get(0).toString() + " <= " + callExpression.getArguments() + .get(1) + .toString(); + } + } + throw new RuntimeException("Unsupported predicate type"); + } + + private BufferedReader searchTable(String tableName, RowExpression additionalPredicate) + { + Path tableArchiveDir = Paths.get(config.getClpArchiveDir(), tableName); + String query = buildKqlQuery(additionalPredicate); + + // Spawn search process and read from stdout try { - return Files.newBufferedReader(decompressFile); + ProcessBuilder processBuilder = + new ProcessBuilder(executablePath.toString(), + "s", + tableArchiveDir.toString(), + query); + Process process = processBuilder.start(); + return new BufferedReader(new InputStreamReader(process.getInputStream())); } catch (IOException e) { - log.error(e, "Failed to get records for table %s", tableName); + log.error(e, "Failed to search records for table %s", tableName); return null; } } - private boolean DecompressRecords(String tableName) + private boolean decompressRecords(String tableName) { Path tableDecompressDir = decompressDir.resolve(tableName); Path tableArchiveDir = Paths.get(config.getClpArchiveDir(), tableName); diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java new file mode 100644 index 0000000000000..f18bdccdd0149 --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java @@ -0,0 +1,65 @@ +package com.yscope.presto; + +import com.facebook.presto.spi.ConnectorPlanOptimizer; +import com.facebook.presto.spi.ConnectorPlanRewriter; +import com.facebook.presto.spi.ConnectorSession; +import com.facebook.presto.spi.TableHandle; +import com.facebook.presto.spi.VariableAllocator; +import com.facebook.presto.spi.plan.FilterNode; +import com.facebook.presto.spi.plan.PlanNode; +import com.facebook.presto.spi.plan.PlanNodeIdAllocator; +import com.facebook.presto.spi.plan.TableScanNode; + +import java.util.Optional; + +import static com.facebook.presto.spi.ConnectorPlanRewriter.rewriteWith; + +public class ClpPlanOptimizer + implements ConnectorPlanOptimizer +{ + @Override + public PlanNode optimize(PlanNode maxSubplan, + ConnectorSession session, + VariableAllocator variableAllocator, + PlanNodeIdAllocator idAllocator) + { + return rewriteWith(new Rewriter(idAllocator), maxSubplan); + } + + private class Rewriter + extends ConnectorPlanRewriter + { + private final PlanNodeIdAllocator idAllocator; + + public Rewriter(PlanNodeIdAllocator idAllocator) + { + this.idAllocator = idAllocator; + } + + @Override + public PlanNode visitFilter(FilterNode node, RewriteContext context) + { + if (!(node.getSource() instanceof TableScanNode)) { + return node; + } + + TableScanNode tableScanNode = (TableScanNode) node.getSource(); + TableHandle tableHandle = tableScanNode.getTable(); + ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle(); + + return new TableScanNode( + node.getSourceLocation(), + idAllocator.getNextId(), + new TableHandle( + tableHandle.getConnectorId(), + new ClpTableHandle(clpTableHandle.getTableName(), Optional.ofNullable(node.getPredicate())), + tableHandle.getTransaction(), + tableHandle.getLayout()), + tableScanNode.getOutputVariables(), + tableScanNode.getAssignments(), + tableScanNode.getTableConstraints(), + tableScanNode.getCurrentConstraint(), + tableScanNode.getEnforcedConstraint()); + } + } +} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java index 68f928a739fed..f55d1ccef8181 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java @@ -47,7 +47,7 @@ public RecordSet getRecordSet(ConnectorTransactionHandle transactionHandle, for (ColumnHandle handle : columns) { handles.add((ClpColumnHandle) handle); } - return new ClpRecordSet(clpClient.getRecords(clpSplit.getTableName()), + return new ClpRecordSet(clpClient.getRecords(clpSplit.getTableName(), clpSplit.getAdditionalPredicate()), clpClient.getConfig().isPolymorphicTypeEnabled(), handles.build()); } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java b/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java index c04a1401efb04..fe3f66e0acc22 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java @@ -16,6 +16,7 @@ import com.facebook.presto.spi.ConnectorSplit; import com.facebook.presto.spi.HostAddress; import com.facebook.presto.spi.NodeProvider; +import com.facebook.presto.spi.relation.RowExpression; import com.facebook.presto.spi.schedule.NodeSelectionStrategy; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; @@ -24,6 +25,7 @@ import javax.annotation.Nullable; import java.util.List; +import java.util.Optional; import static com.facebook.presto.spi.schedule.NodeSelectionStrategy.NO_PREFERENCE; @@ -32,13 +34,16 @@ public class ClpSplit { private final String schemaName; private final String tableName; + private final Optional additionalPredicate; @JsonCreator public ClpSplit(@JsonProperty("schemaName") @Nullable String schemaName, - @JsonProperty("tableName") @Nullable String tableName) + @JsonProperty("tableName") @Nullable String tableName, + @JsonProperty("additionalPredicate") Optional additionalPredicate) { this.schemaName = schemaName; this.tableName = tableName; + this.additionalPredicate = additionalPredicate; } @JsonProperty @@ -54,6 +59,12 @@ public String getTableName() return tableName; } + @JsonProperty + public Optional getAdditionalPredicate() + { + return additionalPredicate; + } + @Override public NodeSelectionStrategy getNodeSelectionStrategy() { diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java b/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java index 744c8fb1d0cb6..ed31ea8ed141f 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java @@ -48,7 +48,9 @@ public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHand if (!clpClient.listTables().contains(tableHandle.getTableName())) { throw new RuntimeException("Table no longer exists: " + tableHandle.getTableName()); } - List splits = Collections.singletonList(new ClpSplit("default", tableHandle.getTableName())); + List splits = Collections.singletonList(new ClpSplit("default", + tableHandle.getTableName(), + tableHandle.getPredicate())); return new FixedSplitSource(splits); } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java b/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java index f0a179d9d49b0..cf5399711012f 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java @@ -14,20 +14,37 @@ package com.yscope.presto; import com.facebook.presto.spi.ConnectorTableHandle; +import com.facebook.presto.spi.relation.RowExpression; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import java.util.Objects; +import java.util.Optional; public class ClpTableHandle implements ConnectorTableHandle { private final String tableName; + private final Optional predicate; @JsonCreator - public ClpTableHandle(@JsonProperty("tableName") String tableName) + public ClpTableHandle(@JsonProperty("tableName") String tableName, + @JsonProperty("predicate") Optional predicate) { this.tableName = tableName; + this.predicate = predicate; + } + + @JsonCreator + public ClpTableHandle(@JsonProperty("tableName") String tableName) + { + this(tableName, Optional.empty()); + } + + @JsonProperty + public Optional getPredicate() + { + return predicate; } @JsonProperty From 8c489690b3cedf29cace77760ece5619ff3f7912 Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 25 Jun 2024 22:54:45 -0400 Subject: [PATCH 022/126] add test cases --- .../java/com/yscope/presto/ClpClient.java | 46 +++++++++++-------- .../main/java/com/yscope/presto/ClpSplit.java | 7 +++ .../com/yscope/presto/TestClpMetadata.java | 9 ++-- .../yscope/presto/TestClpRecordCursor.java | 44 ++++++++++++++++++ 4 files changed, 82 insertions(+), 24 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 6695fb06055b2..850e952424d4a 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -279,33 +279,39 @@ else if (specialFormExpression.getForm() == SpecialFormExpression.Form.OR) { } else if (additionalPredicate instanceof CallExpression) { CallExpression callExpression = (CallExpression) additionalPredicate; + String variableName = callExpression.getArguments().get(0).toString(); + if (variableName.endsWith("_bigint") || variableName.endsWith("_double") || + variableName.endsWith("_varchar") || variableName.endsWith("_boolean")) { + variableName = variableName.substring(0, variableName.lastIndexOf('_')); + } + String literal = callExpression.getArguments().get(1).toString(); switch (callExpression.getDisplayName()) { case "EQUAL": - return callExpression.getArguments().get(0).toString() + ": \"" + callExpression.getArguments() - .get(1) - .toString() + "\""; + if (callExpression.getArguments().get(1).getType().equals(BigintType.BIGINT) || + callExpression.getArguments().get(1).getType().equals(DoubleType.DOUBLE) || + callExpression.getArguments().get(1).getType().equals(BooleanType.BOOLEAN)) { + return variableName + ": " + literal; + } + else { + return variableName + ": \"" + literal + "\""; + } case "<>": - return "NOT " + callExpression.getArguments() - .get(0) - .toString() + ": \"" + callExpression.getArguments() - .get(1) - .toString() + "\""; + if (callExpression.getArguments().get(1).getType().equals(BigintType.BIGINT) || + callExpression.getArguments().get(1).getType().equals(DoubleType.DOUBLE) || + callExpression.getArguments().get(1).getType().equals(BooleanType.BOOLEAN)) { + return "NOT " + variableName + ": " + literal; + } + else { + return "NOT " + variableName + ": \"" + literal + "\""; + } case "GREATER_THAN": - return callExpression.getArguments().get(0).toString() + " > " + callExpression.getArguments() - .get(1) - .toString(); + return variableName + " > " + literal; case "GREATER_THAN_OR_EQUAL": - return callExpression.getArguments().get(0).toString() + " >= " + callExpression.getArguments() - .get(1) - .toString(); + return variableName + " >= " + literal; case "LESS_THAN": - return callExpression.getArguments().get(0).toString() + " < " + callExpression.getArguments() - .get(1) - .toString(); + return variableName + " < " + literal; case "LESS_THAN_OR_EQUAL": - return callExpression.getArguments().get(0).toString() + " <= " + callExpression.getArguments() - .get(1) - .toString(); + return variableName + " <= " + literal; } } throw new RuntimeException("Unsupported predicate type"); diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java b/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java index fe3f66e0acc22..f8f983c071db5 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java @@ -46,6 +46,13 @@ public ClpSplit(@JsonProperty("schemaName") @Nullable String schemaName, this.additionalPredicate = additionalPredicate; } + @JsonCreator + public ClpSplit(@JsonProperty("schemaName") @Nullable String schemaName, + @JsonProperty("tableName") @Nullable String tableName) + { + this(schemaName, tableName, Optional.empty()); + } + @JsonProperty @Nullable public String getSchemaName() diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java index a2d54553fd02c..fc457f6ba9993 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java @@ -13,6 +13,7 @@ */ package com.yscope.presto; +import com.facebook.presto.common.type.BigintType; import com.facebook.presto.common.type.BooleanType; import com.facebook.presto.common.type.DoubleType; import com.facebook.presto.common.type.IntegerType; @@ -67,8 +68,8 @@ public void testGetTable1Metadata() ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(SESSION, clpTableHandle); HashSet columnMetadata = new HashSet<>(); columnMetadata.add(ColumnMetadata.builder() - .setName("a_integer") - .setType(IntegerType.INTEGER) + .setName("a_bigint") + .setType(BigintType.BIGINT) .setNullable(true) .build()); columnMetadata.add(ColumnMetadata.builder() @@ -109,7 +110,7 @@ public void testGetTable2Metadata() HashSet columnMetadata = new HashSet<>(); columnMetadata.add(ColumnMetadata.builder() .setName("id") - .setType(IntegerType.INTEGER) + .setType(BigintType.BIGINT) .setNullable(true) .build()); columnMetadata.add(ColumnMetadata.builder() @@ -119,7 +120,7 @@ public void testGetTable2Metadata() .build()); columnMetadata.add(ColumnMetadata.builder() .setName("age") - .setType(IntegerType.INTEGER) + .setType(BigintType.BIGINT) .setNullable(true) .build()); columnMetadata.add(ColumnMetadata.builder() diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java b/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java index cb8bac554acc4..2894b0c602aa3 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java @@ -13,13 +13,25 @@ */ package com.yscope.presto; +import com.facebook.presto.common.type.BigintType; +import com.facebook.presto.common.type.IntegerType; +import com.facebook.presto.metadata.FunctionAndTypeManager; +import com.facebook.presto.spi.relation.CallExpression; +import com.facebook.presto.spi.relation.VariableReferenceExpression; +import com.google.common.collect.ImmutableList; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; import java.util.ArrayList; import java.util.List; +import java.util.Optional; +import static com.facebook.presto.common.function.OperatorType.EQUAL; +import static com.facebook.presto.common.type.BooleanType.BOOLEAN; +import static com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager; +import static com.facebook.presto.sql.analyzer.TypeSignatureProvider.fromTypes; +import static com.facebook.presto.sql.relational.Expressions.constant; import static com.facebook.presto.testing.TestingConnectorSession.SESSION; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; @@ -103,6 +115,38 @@ public void testTable2RecordCursor() assertFalse(cursor.advanceNextPosition()); } + @Test + public void testPredicate() + { + FunctionAndTypeManager functionAndTypeManager = createTestFunctionAndTypeManager(); + CallExpression callExpression = + new CallExpression(EQUAL.name(), + functionAndTypeManager.resolveOperator(EQUAL, fromTypes( + BigintType.BIGINT, BigintType.BIGINT)), + BOOLEAN, + ImmutableList.of(new VariableReferenceExpression(Optional.empty(), + "a_bigint", + BigintType.BIGINT), + constant(1L, BigintType.BIGINT))); + + ClpRecordSetProvider recordSetProvider = new ClpRecordSetProvider(clpClient); + ClpRecordSet recordSet = (ClpRecordSet) recordSetProvider.getRecordSet( + ClpTransactionHandle.INSTANCE, + SESSION, + new ClpSplit("default", "test_1_table", Optional.of(callExpression)), + new ArrayList<>(clpClient.listColumns("test_1_table"))); + assertNotNull(recordSet, "recordSet is null"); + ClpRecordCursor cursor = (ClpRecordCursor) recordSet.cursor(); + assertNotNull(cursor, "cursor is null"); + assertTrue(cursor.advanceNextPosition()); + assertEquals(cursor.getLong(0), 1); + assertEquals(cursor.getDouble(2), 2.0); + assertTrue(cursor.getBoolean(5)); + assertEquals(cursor.getSlice(6).toStringUtf8(), "Hello world"); + assertNull(cursor, List.of(1, 3, 4)); + assertFalse(cursor.advanceNextPosition()); + } + @AfterMethod public void tearDown() { From a408e54f1403f0b78d0be5ff5fac6ad84c800ac6 Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 25 Jun 2024 23:35:10 -0400 Subject: [PATCH 023/126] fix build error --- presto-clp/pom.xml | 7 +++ .../java/com/yscope/presto/ClpConnector.java | 7 +++ .../java/com/yscope/presto/ClpMetadata.java | 5 ++- .../com/yscope/presto/ClpPlanOptimizer.java | 13 ++++++ .../presto/ClpPlanOptimizerProvider.java | 43 +++++++++++++++++++ .../main/java/com/yscope/presto/ClpSplit.java | 7 --- .../com/yscope/presto/ClpTableHandle.java | 6 --- .../com/yscope/presto/TestClpMetadata.java | 1 - .../yscope/presto/TestClpRecordCursor.java | 5 +-- 9 files changed, 75 insertions(+), 19 deletions(-) create mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizerProvider.java diff --git a/presto-clp/pom.xml b/presto-clp/pom.xml index 749ef6ad3d527..1f1af2d38d99b 100644 --- a/presto-clp/pom.xml +++ b/presto-clp/pom.xml @@ -117,5 +117,12 @@ presto-main test + + + com.facebook.presto + presto-analyzer + test + + diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java b/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java index 7b8111c791714..cdd9d1ad03d7d 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java @@ -17,6 +17,7 @@ import com.facebook.airlift.log.Logger; import com.facebook.presto.spi.connector.Connector; import com.facebook.presto.spi.connector.ConnectorMetadata; +import com.facebook.presto.spi.connector.ConnectorPlanOptimizerProvider; import com.facebook.presto.spi.connector.ConnectorRecordSetProvider; import com.facebook.presto.spi.connector.ConnectorSplitManager; import com.facebook.presto.spi.connector.ConnectorTransactionHandle; @@ -48,6 +49,12 @@ public ClpConnector(LifeCycleManager lifeCycleManager, this.recordSetProvider = requireNonNull(recordSetProvider, "recordSetProvider is null"); } + @Override + public ConnectorPlanOptimizerProvider getConnectorPlanOptimizerProvider() + { + return new ClpPlanOptimizerProvider(); + } + @Override public ConnectorTransactionHandle beginTransaction(IsolationLevel isolationLevel, boolean readOnly) { diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java b/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java index 7c1f4e77af870..26b1e3bf28f1d 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java @@ -71,7 +71,7 @@ public ConnectorTableHandle getTableHandle(ConnectorSession session, SchemaTable return null; } - return new ClpTableHandle(tableName.getTableName()); + return new ClpTableHandle(tableName.getTableName(), Optional.empty()); } @Override @@ -110,7 +110,8 @@ public Map> listTableColumns(ConnectorSess return clpClient.listTables().stream() .collect(ImmutableMap.toImmutableMap( tableName -> new SchemaTableName("default", tableName), - tableName -> getTableMetadata(session, new ClpTableHandle(tableName)).getColumns())); + tableName -> getTableMetadata(session, + new ClpTableHandle(tableName, Optional.empty())).getColumns())); } @Override diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java index f18bdccdd0149..fa140eee7a63c 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java @@ -1,3 +1,16 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.yscope.presto; import com.facebook.presto.spi.ConnectorPlanOptimizer; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizerProvider.java b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizerProvider.java new file mode 100644 index 0000000000000..69ae0f9d25f4e --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizerProvider.java @@ -0,0 +1,43 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +import com.facebook.presto.spi.ConnectorPlanOptimizer; +import com.facebook.presto.spi.connector.ConnectorPlanOptimizerProvider; +import com.google.common.collect.ImmutableSet; + +import javax.inject.Inject; + +import java.util.Set; + +public class ClpPlanOptimizerProvider + implements ConnectorPlanOptimizerProvider +{ + @Inject + public ClpPlanOptimizerProvider() + { + } + + @Override + public Set getLogicalPlanOptimizers() + { + return ImmutableSet.of(); + } + + @Override + public Set getPhysicalPlanOptimizers() + { + return ImmutableSet.of(new ClpPlanOptimizer()); + } +} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java b/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java index f8f983c071db5..fe3f66e0acc22 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java @@ -46,13 +46,6 @@ public ClpSplit(@JsonProperty("schemaName") @Nullable String schemaName, this.additionalPredicate = additionalPredicate; } - @JsonCreator - public ClpSplit(@JsonProperty("schemaName") @Nullable String schemaName, - @JsonProperty("tableName") @Nullable String tableName) - { - this(schemaName, tableName, Optional.empty()); - } - @JsonProperty @Nullable public String getSchemaName() diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java b/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java index cf5399711012f..014d30b9b287a 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java @@ -35,12 +35,6 @@ public ClpTableHandle(@JsonProperty("tableName") String tableName, this.predicate = predicate; } - @JsonCreator - public ClpTableHandle(@JsonProperty("tableName") String tableName) - { - this(tableName, Optional.empty()); - } - @JsonProperty public Optional getPredicate() { diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java index fc457f6ba9993..da955c500d8d0 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java @@ -16,7 +16,6 @@ import com.facebook.presto.common.type.BigintType; import com.facebook.presto.common.type.BooleanType; import com.facebook.presto.common.type.DoubleType; -import com.facebook.presto.common.type.IntegerType; import com.facebook.presto.common.type.VarcharType; import com.facebook.presto.spi.ColumnMetadata; import com.facebook.presto.spi.ConnectorTableMetadata; diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java b/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java index 2894b0c602aa3..4c1573be27f77 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java @@ -14,7 +14,6 @@ package com.yscope.presto; import com.facebook.presto.common.type.BigintType; -import com.facebook.presto.common.type.IntegerType; import com.facebook.presto.metadata.FunctionAndTypeManager; import com.facebook.presto.spi.relation.CallExpression; import com.facebook.presto.spi.relation.VariableReferenceExpression; @@ -67,7 +66,7 @@ public void testTable1RecordCursor() ClpRecordSet recordSet = (ClpRecordSet) recordSetProvider.getRecordSet( ClpTransactionHandle.INSTANCE, SESSION, - new ClpSplit("default", "test_1_table"), + new ClpSplit("default", "test_1_table", Optional.empty()), new ArrayList<>(clpClient.listColumns("test_1_table"))); assertNotNull(recordSet, "recordSet is null"); ClpRecordCursor cursor = (ClpRecordCursor) recordSet.cursor(); @@ -103,7 +102,7 @@ public void testTable2RecordCursor() ClpRecordSet recordSet = (ClpRecordSet) recordSetProvider.getRecordSet( ClpTransactionHandle.INSTANCE, SESSION, - new ClpSplit("default", "test_2_table"), + new ClpSplit("default", "test_2_table", Optional.empty()), new ArrayList<>(clpClient.listColumns("test_2_table"))); assertNotNull(recordSet, "recordSet is null"); ClpRecordCursor cursor = (ClpRecordCursor) recordSet.cursor(); From 00e3a6236f89ff221b120fc20efe647dcbb0a045 Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 25 Jun 2024 23:58:32 -0400 Subject: [PATCH 024/126] fix a bug --- .../java/com/yscope/presto/ClpClient.java | 77 +------------------ .../com/yscope/presto/ClpPlanOptimizer.java | 74 +++++++++++++++++- .../yscope/presto/ClpRecordSetProvider.java | 2 +- .../main/java/com/yscope/presto/ClpSplit.java | 10 +-- .../com/yscope/presto/ClpSplitManager.java | 2 +- .../com/yscope/presto/ClpTableHandle.java | 10 +-- .../yscope/presto/TestClpRecordCursor.java | 4 +- 7 files changed, 90 insertions(+), 89 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 850e952424d4a..d6a16cc96001b 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -19,9 +19,6 @@ import com.facebook.presto.common.type.DoubleType; import com.facebook.presto.common.type.Type; import com.facebook.presto.common.type.VarcharType; -import com.facebook.presto.spi.relation.CallExpression; -import com.facebook.presto.spi.relation.RowExpression; -import com.facebook.presto.spi.relation.SpecialFormExpression; import com.github.luben.zstd.ZstdInputStream; import com.google.common.collect.ImmutableSet; import com.yscope.presto.schema.SchemaNode; @@ -226,14 +223,14 @@ public Set listColumns(String tableName) return polymorphicColumnHandles; } - public BufferedReader getRecords(String tableName, Optional additionalPredicate) + public BufferedReader getRecords(String tableName, Optional query) { if (!listTables().contains(tableName)) { return null; } - if (additionalPredicate.isPresent()) { - return searchTable(tableName, additionalPredicate.get()); + if (query.isPresent()) { + return searchTable(tableName, query.get()); } else { Path decompressFile = decompressDir.resolve(tableName).resolve("original"); @@ -254,75 +251,9 @@ public BufferedReader getRecords(String tableName, Optional addit } } - private String buildKqlQuery(RowExpression additionalPredicate) - { - if (additionalPredicate instanceof SpecialFormExpression) { - SpecialFormExpression specialFormExpression = (SpecialFormExpression) additionalPredicate; - if (specialFormExpression.getForm() == SpecialFormExpression.Form.AND) { - StringBuilder queryBuilder = new StringBuilder(); - queryBuilder.append("("); - for (RowExpression argument : specialFormExpression.getArguments()) { - queryBuilder.append(buildKqlQuery(argument)); - queryBuilder.append(" AND "); - } - return queryBuilder.substring(0, queryBuilder.length() - 5) + ")"; - } - else if (specialFormExpression.getForm() == SpecialFormExpression.Form.OR) { - StringBuilder queryBuilder = new StringBuilder(); - queryBuilder.append("("); - for (RowExpression argument : specialFormExpression.getArguments()) { - queryBuilder.append(buildKqlQuery(argument)); - queryBuilder.append(" OR "); - } - return queryBuilder.substring(0, queryBuilder.length() - 4) + ")"; - } - } - else if (additionalPredicate instanceof CallExpression) { - CallExpression callExpression = (CallExpression) additionalPredicate; - String variableName = callExpression.getArguments().get(0).toString(); - if (variableName.endsWith("_bigint") || variableName.endsWith("_double") || - variableName.endsWith("_varchar") || variableName.endsWith("_boolean")) { - variableName = variableName.substring(0, variableName.lastIndexOf('_')); - } - String literal = callExpression.getArguments().get(1).toString(); - switch (callExpression.getDisplayName()) { - case "EQUAL": - if (callExpression.getArguments().get(1).getType().equals(BigintType.BIGINT) || - callExpression.getArguments().get(1).getType().equals(DoubleType.DOUBLE) || - callExpression.getArguments().get(1).getType().equals(BooleanType.BOOLEAN)) { - return variableName + ": " + literal; - } - else { - return variableName + ": \"" + literal + "\""; - } - case "<>": - if (callExpression.getArguments().get(1).getType().equals(BigintType.BIGINT) || - callExpression.getArguments().get(1).getType().equals(DoubleType.DOUBLE) || - callExpression.getArguments().get(1).getType().equals(BooleanType.BOOLEAN)) { - return "NOT " + variableName + ": " + literal; - } - else { - return "NOT " + variableName + ": \"" + literal + "\""; - } - case "GREATER_THAN": - return variableName + " > " + literal; - case "GREATER_THAN_OR_EQUAL": - return variableName + " >= " + literal; - case "LESS_THAN": - return variableName + " < " + literal; - case "LESS_THAN_OR_EQUAL": - return variableName + " <= " + literal; - } - } - throw new RuntimeException("Unsupported predicate type"); - } - - private BufferedReader searchTable(String tableName, RowExpression additionalPredicate) + private BufferedReader searchTable(String tableName, String query) { Path tableArchiveDir = Paths.get(config.getClpArchiveDir(), tableName); - String query = buildKqlQuery(additionalPredicate); - - // Spawn search process and read from stdout try { ProcessBuilder processBuilder = new ProcessBuilder(executablePath.toString(), diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java index fa140eee7a63c..1bb09af079ac7 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java @@ -13,6 +13,9 @@ */ package com.yscope.presto; +import com.facebook.presto.common.type.BigintType; +import com.facebook.presto.common.type.BooleanType; +import com.facebook.presto.common.type.DoubleType; import com.facebook.presto.spi.ConnectorPlanOptimizer; import com.facebook.presto.spi.ConnectorPlanRewriter; import com.facebook.presto.spi.ConnectorSession; @@ -22,6 +25,9 @@ import com.facebook.presto.spi.plan.PlanNode; import com.facebook.presto.spi.plan.PlanNodeIdAllocator; import com.facebook.presto.spi.plan.TableScanNode; +import com.facebook.presto.spi.relation.CallExpression; +import com.facebook.presto.spi.relation.RowExpression; +import com.facebook.presto.spi.relation.SpecialFormExpression; import java.util.Optional; @@ -39,7 +45,70 @@ public PlanNode optimize(PlanNode maxSubplan, return rewriteWith(new Rewriter(idAllocator), maxSubplan); } - private class Rewriter + public static String buildKqlQuery(RowExpression additionalPredicate) + { + if (additionalPredicate instanceof SpecialFormExpression) { + SpecialFormExpression specialFormExpression = (SpecialFormExpression) additionalPredicate; + if (specialFormExpression.getForm() == SpecialFormExpression.Form.AND) { + StringBuilder queryBuilder = new StringBuilder(); + queryBuilder.append("("); + for (RowExpression argument : specialFormExpression.getArguments()) { + queryBuilder.append(buildKqlQuery(argument)); + queryBuilder.append(" AND "); + } + return queryBuilder.substring(0, queryBuilder.length() - 5) + ")"; + } + else if (specialFormExpression.getForm() == SpecialFormExpression.Form.OR) { + StringBuilder queryBuilder = new StringBuilder(); + queryBuilder.append("("); + for (RowExpression argument : specialFormExpression.getArguments()) { + queryBuilder.append(buildKqlQuery(argument)); + queryBuilder.append(" OR "); + } + return queryBuilder.substring(0, queryBuilder.length() - 4) + ")"; + } + } + else if (additionalPredicate instanceof CallExpression) { + CallExpression callExpression = (CallExpression) additionalPredicate; + String variableName = callExpression.getArguments().get(0).toString(); + if (variableName.endsWith("_bigint") || variableName.endsWith("_double") || + variableName.endsWith("_varchar") || variableName.endsWith("_boolean")) { + variableName = variableName.substring(0, variableName.lastIndexOf('_')); + } + String literal = callExpression.getArguments().get(1).toString(); + switch (callExpression.getDisplayName()) { + case "EQUAL": + if (callExpression.getArguments().get(1).getType().equals(BigintType.BIGINT) || + callExpression.getArguments().get(1).getType().equals(DoubleType.DOUBLE) || + callExpression.getArguments().get(1).getType().equals(BooleanType.BOOLEAN)) { + return variableName + ": " + literal; + } + else { + return variableName + ": \"" + literal + "\""; + } + case "<>": + if (callExpression.getArguments().get(1).getType().equals(BigintType.BIGINT) || + callExpression.getArguments().get(1).getType().equals(DoubleType.DOUBLE) || + callExpression.getArguments().get(1).getType().equals(BooleanType.BOOLEAN)) { + return "NOT " + variableName + ": " + literal; + } + else { + return "NOT " + variableName + ": \"" + literal + "\""; + } + case "GREATER_THAN": + return variableName + " > " + literal; + case "GREATER_THAN_OR_EQUAL": + return variableName + " >= " + literal; + case "LESS_THAN": + return variableName + " < " + literal; + case "LESS_THAN_OR_EQUAL": + return variableName + " <= " + literal; + } + } + throw new RuntimeException("Unsupported predicate type"); + } + + private static class Rewriter extends ConnectorPlanRewriter { private final PlanNodeIdAllocator idAllocator; @@ -65,7 +134,8 @@ public PlanNode visitFilter(FilterNode node, RewriteContext context) idAllocator.getNextId(), new TableHandle( tableHandle.getConnectorId(), - new ClpTableHandle(clpTableHandle.getTableName(), Optional.ofNullable(node.getPredicate())), + new ClpTableHandle(clpTableHandle.getTableName(), + Optional.of(buildKqlQuery(node.getPredicate()))), tableHandle.getTransaction(), tableHandle.getLayout()), tableScanNode.getOutputVariables(), diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java index f55d1ccef8181..dde555784d8f3 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java @@ -47,7 +47,7 @@ public RecordSet getRecordSet(ConnectorTransactionHandle transactionHandle, for (ColumnHandle handle : columns) { handles.add((ClpColumnHandle) handle); } - return new ClpRecordSet(clpClient.getRecords(clpSplit.getTableName(), clpSplit.getAdditionalPredicate()), + return new ClpRecordSet(clpClient.getRecords(clpSplit.getTableName(), clpSplit.getQuery()), clpClient.getConfig().isPolymorphicTypeEnabled(), handles.build()); } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java b/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java index fe3f66e0acc22..af63a0554412b 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java @@ -34,16 +34,16 @@ public class ClpSplit { private final String schemaName; private final String tableName; - private final Optional additionalPredicate; + private final Optional query; @JsonCreator public ClpSplit(@JsonProperty("schemaName") @Nullable String schemaName, @JsonProperty("tableName") @Nullable String tableName, - @JsonProperty("additionalPredicate") Optional additionalPredicate) + @JsonProperty("query") Optional query) { this.schemaName = schemaName; this.tableName = tableName; - this.additionalPredicate = additionalPredicate; + this.query = query; } @JsonProperty @@ -60,9 +60,9 @@ public String getTableName() } @JsonProperty - public Optional getAdditionalPredicate() + public Optional getQuery() { - return additionalPredicate; + return query; } @Override diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java b/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java index ed31ea8ed141f..ad8dc67606236 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java @@ -50,7 +50,7 @@ public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHand } List splits = Collections.singletonList(new ClpSplit("default", tableHandle.getTableName(), - tableHandle.getPredicate())); + tableHandle.getQuery())); return new FixedSplitSource(splits); } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java b/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java index 014d30b9b287a..397b69a854765 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java @@ -25,20 +25,20 @@ public class ClpTableHandle implements ConnectorTableHandle { private final String tableName; - private final Optional predicate; + private final Optional query; @JsonCreator public ClpTableHandle(@JsonProperty("tableName") String tableName, - @JsonProperty("predicate") Optional predicate) + @JsonProperty("query") Optional query) { this.tableName = tableName; - this.predicate = predicate; + this.query = query; } @JsonProperty - public Optional getPredicate() + public Optional getQuery() { - return predicate; + return query; } @JsonProperty diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java b/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java index 4c1573be27f77..2917e01007cfa 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java @@ -127,12 +127,12 @@ public void testPredicate() "a_bigint", BigintType.BIGINT), constant(1L, BigintType.BIGINT))); - + String query = ClpPlanOptimizer.buildKqlQuery(callExpression); ClpRecordSetProvider recordSetProvider = new ClpRecordSetProvider(clpClient); ClpRecordSet recordSet = (ClpRecordSet) recordSetProvider.getRecordSet( ClpTransactionHandle.INSTANCE, SESSION, - new ClpSplit("default", "test_1_table", Optional.of(callExpression)), + new ClpSplit("default", "test_1_table", Optional.of(query)), new ArrayList<>(clpClient.listColumns("test_1_table"))); assertNotNull(recordSet, "recordSet is null"); ClpRecordCursor cursor = (ClpRecordCursor) recordSet.cursor(); From c88055cf8a040e26471256202da3c8f26bf4dca1 Mon Sep 17 00:00:00 2001 From: wraymo Date: Wed, 26 Jun 2024 00:00:53 -0400 Subject: [PATCH 025/126] remove unused import --- presto-clp/src/main/java/com/yscope/presto/ClpSplit.java | 1 - presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java | 1 - 2 files changed, 2 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java b/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java index af63a0554412b..5b9778c8caf63 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java @@ -16,7 +16,6 @@ import com.facebook.presto.spi.ConnectorSplit; import com.facebook.presto.spi.HostAddress; import com.facebook.presto.spi.NodeProvider; -import com.facebook.presto.spi.relation.RowExpression; import com.facebook.presto.spi.schedule.NodeSelectionStrategy; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java b/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java index 397b69a854765..e1c218ffc7146 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java @@ -14,7 +14,6 @@ package com.yscope.presto; import com.facebook.presto.spi.ConnectorTableHandle; -import com.facebook.presto.spi.relation.RowExpression; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; From fbd02c72ac928742713b150a31f5afaff6cd0b41 Mon Sep 17 00:00:00 2001 From: wraymo Date: Wed, 26 Jun 2024 00:38:19 -0400 Subject: [PATCH 026/126] get a functional pushdown impl --- .../java/com/yscope/presto/ClpMetadata.java | 6 ++--- .../com/yscope/presto/ClpPlanOptimizer.java | 25 +++++++++---------- .../com/yscope/presto/ClpSplitManager.java | 5 ++-- .../com/yscope/presto/ClpTableHandle.java | 12 +-------- .../yscope/presto/ClpTableLayoutHandle.java | 12 ++++++++- 5 files changed, 29 insertions(+), 31 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java b/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java index 26b1e3bf28f1d..e73983a51ba17 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java @@ -71,7 +71,7 @@ public ConnectorTableHandle getTableHandle(ConnectorSession session, SchemaTable return null; } - return new ClpTableHandle(tableName.getTableName(), Optional.empty()); + return new ClpTableHandle(tableName.getTableName()); } @Override @@ -81,7 +81,7 @@ public List getTableLayouts(ConnectorSession session Optional> desiredColumns) { ClpTableHandle tableHandle = (ClpTableHandle) table; - ConnectorTableLayout layout = new ConnectorTableLayout(new ClpTableLayoutHandle(tableHandle)); + ConnectorTableLayout layout = new ConnectorTableLayout(new ClpTableLayoutHandle(tableHandle, Optional.empty())); return ImmutableList.of(new ConnectorTableLayoutResult(layout, constraint.getSummary())); } @@ -111,7 +111,7 @@ public Map> listTableColumns(ConnectorSess .collect(ImmutableMap.toImmutableMap( tableName -> new SchemaTableName("default", tableName), tableName -> getTableMetadata(session, - new ClpTableHandle(tableName, Optional.empty())).getColumns())); + new ClpTableHandle(tableName)).getColumns())); } @Override diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java index 1bb09af079ac7..2e811e1919dd7 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java @@ -36,15 +36,6 @@ public class ClpPlanOptimizer implements ConnectorPlanOptimizer { - @Override - public PlanNode optimize(PlanNode maxSubplan, - ConnectorSession session, - VariableAllocator variableAllocator, - PlanNodeIdAllocator idAllocator) - { - return rewriteWith(new Rewriter(idAllocator), maxSubplan); - } - public static String buildKqlQuery(RowExpression additionalPredicate) { if (additionalPredicate instanceof SpecialFormExpression) { @@ -108,6 +99,15 @@ else if (additionalPredicate instanceof CallExpression) { throw new RuntimeException("Unsupported predicate type"); } + @Override + public PlanNode optimize(PlanNode maxSubplan, + ConnectorSession session, + VariableAllocator variableAllocator, + PlanNodeIdAllocator idAllocator) + { + return rewriteWith(new Rewriter(idAllocator), maxSubplan); + } + private static class Rewriter extends ConnectorPlanRewriter { @@ -128,16 +128,15 @@ public PlanNode visitFilter(FilterNode node, RewriteContext context) TableScanNode tableScanNode = (TableScanNode) node.getSource(); TableHandle tableHandle = tableScanNode.getTable(); ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle(); - + ClpTableLayoutHandle clpTableLayoutHandle = new ClpTableLayoutHandle(clpTableHandle, Optional.of(buildKqlQuery(node.getPredicate()))); return new TableScanNode( node.getSourceLocation(), idAllocator.getNextId(), new TableHandle( tableHandle.getConnectorId(), - new ClpTableHandle(clpTableHandle.getTableName(), - Optional.of(buildKqlQuery(node.getPredicate()))), + clpTableHandle, tableHandle.getTransaction(), - tableHandle.getLayout()), + Optional.of(clpTableLayoutHandle)), tableScanNode.getOutputVariables(), tableScanNode.getAssignments(), tableScanNode.getTableConstraints(), diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java b/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java index ad8dc67606236..dc8fbcc25ed15 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java @@ -48,9 +48,8 @@ public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHand if (!clpClient.listTables().contains(tableHandle.getTableName())) { throw new RuntimeException("Table no longer exists: " + tableHandle.getTableName()); } - List splits = Collections.singletonList(new ClpSplit("default", - tableHandle.getTableName(), - tableHandle.getQuery())); + List splits = + Collections.singletonList(new ClpSplit("default", tableHandle.getTableName(), layoutHandle.getQuery())); return new FixedSplitSource(splits); } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java b/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java index e1c218ffc7146..f0a179d9d49b0 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java @@ -18,26 +18,16 @@ import com.fasterxml.jackson.annotation.JsonProperty; import java.util.Objects; -import java.util.Optional; public class ClpTableHandle implements ConnectorTableHandle { private final String tableName; - private final Optional query; @JsonCreator - public ClpTableHandle(@JsonProperty("tableName") String tableName, - @JsonProperty("query") Optional query) + public ClpTableHandle(@JsonProperty("tableName") String tableName) { this.tableName = tableName; - this.query = query; - } - - @JsonProperty - public Optional getQuery() - { - return query; } @JsonProperty diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpTableLayoutHandle.java b/presto-clp/src/main/java/com/yscope/presto/ClpTableLayoutHandle.java index 9a5f51b4e1b68..77ae589cdbf6c 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpTableLayoutHandle.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpTableLayoutHandle.java @@ -18,16 +18,20 @@ import com.fasterxml.jackson.annotation.JsonProperty; import java.util.Objects; +import java.util.Optional; public class ClpTableLayoutHandle implements ConnectorTableLayoutHandle { private final ClpTableHandle table; + private final Optional query; @JsonCreator - public ClpTableLayoutHandle(@JsonProperty("table") ClpTableHandle table) + public ClpTableLayoutHandle(@JsonProperty("table") ClpTableHandle table, + @JsonProperty("query") Optional query) { this.table = table; + this.query = query; } @JsonProperty @@ -36,6 +40,12 @@ public ClpTableHandle getTable() return table; } + @JsonProperty + public Optional getQuery() + { + return query; + } + @Override public boolean equals(Object o) { From b7a094ffbe540b7fd1c6d2980c7086002447cbd9 Mon Sep 17 00:00:00 2001 From: wraymo Date: Wed, 26 Jun 2024 11:35:37 -0400 Subject: [PATCH 027/126] correctly handle string fields and nested fields --- .../com/yscope/presto/ClpPlanOptimizer.java | 83 ++++++++++++++----- .../presto/spi/VariableAllocator.java | 2 +- 2 files changed, 63 insertions(+), 22 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java index 2e811e1919dd7..235741066d924 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java @@ -13,9 +13,11 @@ */ package com.yscope.presto; +import com.facebook.airlift.log.Logger; import com.facebook.presto.common.type.BigintType; import com.facebook.presto.common.type.BooleanType; import com.facebook.presto.common.type.DoubleType; +import com.facebook.presto.common.type.VarcharType; import com.facebook.presto.spi.ConnectorPlanOptimizer; import com.facebook.presto.spi.ConnectorPlanRewriter; import com.facebook.presto.spi.ConnectorSession; @@ -26,8 +28,10 @@ import com.facebook.presto.spi.plan.PlanNodeIdAllocator; import com.facebook.presto.spi.plan.TableScanNode; import com.facebook.presto.spi.relation.CallExpression; +import com.facebook.presto.spi.relation.ConstantExpression; import com.facebook.presto.spi.relation.RowExpression; import com.facebook.presto.spi.relation.SpecialFormExpression; +import io.airlift.slice.Slice; import java.util.Optional; @@ -36,6 +40,25 @@ public class ClpPlanOptimizer implements ConnectorPlanOptimizer { + private static final Logger log = Logger.get(ClpPlanOptimizer.class); + + private static String getVariableName(String variableName) + { + if (variableName.endsWith("_bigint") || variableName.endsWith("_double") || + variableName.endsWith("_varchar") || variableName.endsWith("_boolean")) { + return variableName.substring(0, variableName.lastIndexOf('_')); + } + return variableName; + } + + private static String getLiteralString(ConstantExpression literal) + { + if (literal.getValue() instanceof Slice) { + return ((Slice) literal.getValue()).toStringUtf8(); + } + return literal.toString(); + } + public static String buildKqlQuery(RowExpression additionalPredicate) { if (additionalPredicate instanceof SpecialFormExpression) { @@ -58,42 +81,58 @@ else if (specialFormExpression.getForm() == SpecialFormExpression.Form.OR) { } return queryBuilder.substring(0, queryBuilder.length() - 4) + ")"; } + else if (specialFormExpression.getForm() == SpecialFormExpression.Form.IN) { + CallExpression callExpression = (CallExpression) specialFormExpression.getArguments().get(1); + String variableName = getVariableName(specialFormExpression.getArguments().get(0).toString()); + StringBuilder queryBuilder = new StringBuilder(); + queryBuilder.append(variableName); + queryBuilder.append("("); + for (RowExpression argument : callExpression.getArguments() + .subList(1, callExpression.getArguments().size())) { + ConstantExpression literal = (ConstantExpression) argument; + String literalString = getLiteralString(literal); + queryBuilder.append(variableName).append(": "); + if (literal.getType().equals(VarcharType.VARCHAR)) { + queryBuilder.append("\""); + queryBuilder.append(literalString); + queryBuilder.append("\""); + } + else { + queryBuilder.append(literalString); + } + queryBuilder.append(" OR "); + } + return queryBuilder.substring(0, queryBuilder.length() - 4) + ")"; + } } else if (additionalPredicate instanceof CallExpression) { CallExpression callExpression = (CallExpression) additionalPredicate; - String variableName = callExpression.getArguments().get(0).toString(); - if (variableName.endsWith("_bigint") || variableName.endsWith("_double") || - variableName.endsWith("_varchar") || variableName.endsWith("_boolean")) { - variableName = variableName.substring(0, variableName.lastIndexOf('_')); - } - String literal = callExpression.getArguments().get(1).toString(); + String variableName = getVariableName(callExpression.getArguments().get(0).toString()); + ConstantExpression literal = (ConstantExpression) callExpression.getArguments().get(1); + String literalString = getLiteralString(literal); switch (callExpression.getDisplayName()) { case "EQUAL": - if (callExpression.getArguments().get(1).getType().equals(BigintType.BIGINT) || - callExpression.getArguments().get(1).getType().equals(DoubleType.DOUBLE) || - callExpression.getArguments().get(1).getType().equals(BooleanType.BOOLEAN)) { - return variableName + ": " + literal; + if (literal.getType().equals(VarcharType.VARCHAR)) { + return variableName + ": \"" + literalString + "\""; } else { - return variableName + ": \"" + literal + "\""; + return variableName + ": " + literalString; } case "<>": - if (callExpression.getArguments().get(1).getType().equals(BigintType.BIGINT) || - callExpression.getArguments().get(1).getType().equals(DoubleType.DOUBLE) || - callExpression.getArguments().get(1).getType().equals(BooleanType.BOOLEAN)) { - return "NOT " + variableName + ": " + literal; + if (literal.getType().equals(VarcharType.VARCHAR)) { + return "NOT " + variableName + ": \"" + literalString + "\""; } else { - return "NOT " + variableName + ": \"" + literal + "\""; + return "NOT " + variableName + ": " + literalString; } case "GREATER_THAN": - return variableName + " > " + literal; + return variableName + " > " + literalString; case "GREATER_THAN_OR_EQUAL": - return variableName + " >= " + literal; + return variableName + " >= " + literalString; case "LESS_THAN": - return variableName + " < " + literal; + return variableName + " < " + literalString; case "LESS_THAN_OR_EQUAL": - return variableName + " <= " + literal; + return variableName + " <= " + literalString; } } throw new RuntimeException("Unsupported predicate type"); @@ -128,7 +167,9 @@ public PlanNode visitFilter(FilterNode node, RewriteContext context) TableScanNode tableScanNode = (TableScanNode) node.getSource(); TableHandle tableHandle = tableScanNode.getTable(); ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle(); - ClpTableLayoutHandle clpTableLayoutHandle = new ClpTableLayoutHandle(clpTableHandle, Optional.of(buildKqlQuery(node.getPredicate()))); + String query = buildKqlQuery(node.getPredicate()); + log.info("Query: " + query); + ClpTableLayoutHandle clpTableLayoutHandle = new ClpTableLayoutHandle(clpTableHandle, Optional.of(query)); return new TableScanNode( node.getSourceLocation(), idAllocator.getNextId(), diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/VariableAllocator.java b/presto-spi/src/main/java/com/facebook/presto/spi/VariableAllocator.java index edf6d015d5ba8..a88c225d86fb5 100644 --- a/presto-spi/src/main/java/com/facebook/presto/spi/VariableAllocator.java +++ b/presto-spi/src/main/java/com/facebook/presto/spi/VariableAllocator.java @@ -32,7 +32,7 @@ public class VariableAllocator { - protected static final Pattern DISALLOWED_CHAR_PATTERN = Pattern.compile("[^a-zA-Z0-9_\\-$]+"); + protected static final Pattern DISALLOWED_CHAR_PATTERN = Pattern.compile("[^.a-zA-Z0-9_\\-$]+"); protected final Map variables; protected int nextId; From af883cfa5f1dfdb82dd6086e1b71ddc4428ad501 Mon Sep 17 00:00:00 2001 From: wraymo Date: Wed, 26 Jun 2024 11:39:02 -0400 Subject: [PATCH 028/126] remove unused import --- .../src/main/java/com/yscope/presto/ClpPlanOptimizer.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java index 235741066d924..950d63cbc1732 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java @@ -14,9 +14,6 @@ package com.yscope.presto; import com.facebook.airlift.log.Logger; -import com.facebook.presto.common.type.BigintType; -import com.facebook.presto.common.type.BooleanType; -import com.facebook.presto.common.type.DoubleType; import com.facebook.presto.common.type.VarcharType; import com.facebook.presto.spi.ConnectorPlanOptimizer; import com.facebook.presto.spi.ConnectorPlanRewriter; From 2ab5892c4525bda095565d94679554f5c3bb36af Mon Sep 17 00:00:00 2001 From: wraymo Date: Wed, 26 Jun 2024 12:10:22 -0400 Subject: [PATCH 029/126] add query optimizer and fix a bug --- presto-clp/pom.xml | 5 +++ .../java/com/yscope/presto/ClpConnector.java | 19 +++++++++- .../yscope/presto/ClpConnectorFactory.java | 6 +++ .../com/yscope/presto/ClpPlanOptimizer.java | 38 +++++++++++++++---- .../presto/ClpPlanOptimizerProvider.java | 23 ++++++++++- 5 files changed, 79 insertions(+), 12 deletions(-) diff --git a/presto-clp/pom.xml b/presto-clp/pom.xml index 1f1af2d38d99b..d0a4fea2ccdff 100644 --- a/presto-clp/pom.xml +++ b/presto-clp/pom.xml @@ -94,6 +94,11 @@ provided + + com.facebook.presto + presto-expressions + + io.airlift units diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java b/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java index cdd9d1ad03d7d..7d9cdaeb9c7b0 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java @@ -21,6 +21,9 @@ import com.facebook.presto.spi.connector.ConnectorRecordSetProvider; import com.facebook.presto.spi.connector.ConnectorSplitManager; import com.facebook.presto.spi.connector.ConnectorTransactionHandle; +import com.facebook.presto.spi.function.FunctionMetadataManager; +import com.facebook.presto.spi.function.StandardFunctionResolution; +import com.facebook.presto.spi.relation.RowExpressionService; import com.facebook.presto.spi.transaction.IsolationLevel; import javax.inject.Inject; @@ -36,23 +39,35 @@ public class ClpConnector private final ClpMetadata metadata; private final ClpSplitManager splitManager; private final ClpRecordSetProvider recordSetProvider; + private final FunctionMetadataManager functionManager; + private final StandardFunctionResolution functionResolution; + private final RowExpressionService rowExpressionService; @Inject public ClpConnector(LifeCycleManager lifeCycleManager, ClpMetadata metadata, ClpSplitManager splitManager, - ClpRecordSetProvider recordSetProvider) + ClpRecordSetProvider recordSetProvider, + FunctionMetadataManager functionManager, + StandardFunctionResolution functionResolution, + RowExpressionService rowExpressionService) { this.lifeCycleManager = requireNonNull(lifeCycleManager, "lifeCycleManager is null"); this.metadata = requireNonNull(metadata, "metadata is null"); this.splitManager = requireNonNull(splitManager, "splitManager is null"); this.recordSetProvider = requireNonNull(recordSetProvider, "recordSetProvider is null"); + this.functionManager = requireNonNull(functionManager, "functionManager is null"); + this.functionResolution = requireNonNull(functionResolution, "functionResolution is null"); + this.rowExpressionService = requireNonNull(rowExpressionService, "rowExpressionService is null"); } @Override public ConnectorPlanOptimizerProvider getConnectorPlanOptimizerProvider() { - return new ClpPlanOptimizerProvider(); + return new ClpPlanOptimizerProvider(functionManager, + functionResolution, + rowExpressionService.getDeterminismEvaluator(), + rowExpressionService.getExpressionOptimizer()); } @Override diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpConnectorFactory.java b/presto-clp/src/main/java/com/yscope/presto/ClpConnectorFactory.java index ae4fd6d09b9b2..5b9e003b093c7 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpConnectorFactory.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpConnectorFactory.java @@ -21,6 +21,9 @@ import com.facebook.presto.spi.connector.Connector; import com.facebook.presto.spi.connector.ConnectorContext; import com.facebook.presto.spi.connector.ConnectorFactory; +import com.facebook.presto.spi.function.FunctionMetadataManager; +import com.facebook.presto.spi.function.StandardFunctionResolution; +import com.facebook.presto.spi.relation.RowExpressionService; import com.google.inject.Injector; import java.util.Map; @@ -51,6 +54,9 @@ public Connector create(String catalogName, Map config, Connecto Bootstrap app = new Bootstrap(new JsonModule(), new ClpModule(), binder -> { binder.bind(TypeManager.class).toInstance(context.getTypeManager()); binder.bind(NodeManager.class).toInstance(context.getNodeManager()); + binder.bind(FunctionMetadataManager.class).toInstance(context.getFunctionMetadataManager()); + binder.bind(StandardFunctionResolution.class).toInstance(context.getStandardFunctionResolution()); + binder.bind(RowExpressionService.class).toInstance(context.getRowExpressionService()); }); Injector injector = app.doNotInitializeLogging().setRequiredConfigurationProperties(config).initialize(); diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java index 950d63cbc1732..7148c7caf57bb 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java @@ -15,17 +15,22 @@ import com.facebook.airlift.log.Logger; import com.facebook.presto.common.type.VarcharType; +import com.facebook.presto.expressions.LogicalRowExpressions; import com.facebook.presto.spi.ConnectorPlanOptimizer; import com.facebook.presto.spi.ConnectorPlanRewriter; import com.facebook.presto.spi.ConnectorSession; import com.facebook.presto.spi.TableHandle; import com.facebook.presto.spi.VariableAllocator; +import com.facebook.presto.spi.function.FunctionMetadataManager; +import com.facebook.presto.spi.function.StandardFunctionResolution; import com.facebook.presto.spi.plan.FilterNode; import com.facebook.presto.spi.plan.PlanNode; import com.facebook.presto.spi.plan.PlanNodeIdAllocator; import com.facebook.presto.spi.plan.TableScanNode; import com.facebook.presto.spi.relation.CallExpression; import com.facebook.presto.spi.relation.ConstantExpression; +import com.facebook.presto.spi.relation.DeterminismEvaluator; +import com.facebook.presto.spi.relation.ExpressionOptimizer; import com.facebook.presto.spi.relation.RowExpression; import com.facebook.presto.spi.relation.SpecialFormExpression; import io.airlift.slice.Slice; @@ -33,11 +38,26 @@ import java.util.Optional; import static com.facebook.presto.spi.ConnectorPlanRewriter.rewriteWith; +import static com.facebook.presto.spi.relation.ExpressionOptimizer.Level.OPTIMIZED; public class ClpPlanOptimizer implements ConnectorPlanOptimizer { private static final Logger log = Logger.get(ClpPlanOptimizer.class); + private final LogicalRowExpressions logicalRowExpressions; + private final ExpressionOptimizer expressionOptimizer; + + public ClpPlanOptimizer(FunctionMetadataManager functionManager, + StandardFunctionResolution functionResolution, + DeterminismEvaluator determinismEvaluator, + ExpressionOptimizer expressionOptimizer) + { + this.logicalRowExpressions = new LogicalRowExpressions( + determinismEvaluator, + functionResolution, + functionManager); + this.expressionOptimizer = expressionOptimizer; + } private static String getVariableName(String variableName) { @@ -79,13 +99,11 @@ else if (specialFormExpression.getForm() == SpecialFormExpression.Form.OR) { return queryBuilder.substring(0, queryBuilder.length() - 4) + ")"; } else if (specialFormExpression.getForm() == SpecialFormExpression.Form.IN) { - CallExpression callExpression = (CallExpression) specialFormExpression.getArguments().get(1); String variableName = getVariableName(specialFormExpression.getArguments().get(0).toString()); StringBuilder queryBuilder = new StringBuilder(); - queryBuilder.append(variableName); queryBuilder.append("("); - for (RowExpression argument : callExpression.getArguments() - .subList(1, callExpression.getArguments().size())) { + for (RowExpression argument : specialFormExpression.getArguments() + .subList(1, specialFormExpression.getArguments().size())) { ConstantExpression literal = (ConstantExpression) argument; String literalString = getLiteralString(literal); queryBuilder.append(variableName).append(": "); @@ -141,16 +159,18 @@ public PlanNode optimize(PlanNode maxSubplan, VariableAllocator variableAllocator, PlanNodeIdAllocator idAllocator) { - return rewriteWith(new Rewriter(idAllocator), maxSubplan); + return rewriteWith(new Rewriter(session, idAllocator), maxSubplan); } - private static class Rewriter + private class Rewriter extends ConnectorPlanRewriter { + private final ConnectorSession session; private final PlanNodeIdAllocator idAllocator; - public Rewriter(PlanNodeIdAllocator idAllocator) + public Rewriter(ConnectorSession session, PlanNodeIdAllocator idAllocator) { + this.session = session; this.idAllocator = idAllocator; } @@ -164,7 +184,9 @@ public PlanNode visitFilter(FilterNode node, RewriteContext context) TableScanNode tableScanNode = (TableScanNode) node.getSource(); TableHandle tableHandle = tableScanNode.getTable(); ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle(); - String query = buildKqlQuery(node.getPredicate()); + RowExpression predicate = expressionOptimizer.optimize(node.getPredicate(), OPTIMIZED, session); + predicate = logicalRowExpressions.convertToConjunctiveNormalForm(predicate); + String query = buildKqlQuery(predicate); log.info("Query: " + query); ClpTableLayoutHandle clpTableLayoutHandle = new ClpTableLayoutHandle(clpTableHandle, Optional.of(query)); return new TableScanNode( diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizerProvider.java b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizerProvider.java index 69ae0f9d25f4e..a6deb548a58ee 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizerProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizerProvider.java @@ -15,6 +15,10 @@ import com.facebook.presto.spi.ConnectorPlanOptimizer; import com.facebook.presto.spi.connector.ConnectorPlanOptimizerProvider; +import com.facebook.presto.spi.function.FunctionMetadataManager; +import com.facebook.presto.spi.function.StandardFunctionResolution; +import com.facebook.presto.spi.relation.DeterminismEvaluator; +import com.facebook.presto.spi.relation.ExpressionOptimizer; import com.google.common.collect.ImmutableSet; import javax.inject.Inject; @@ -24,9 +28,21 @@ public class ClpPlanOptimizerProvider implements ConnectorPlanOptimizerProvider { + private final FunctionMetadataManager functionManager; + private final StandardFunctionResolution functionResolution; + private final DeterminismEvaluator determinismEvaluator; + private final ExpressionOptimizer expressionOptimizer; + @Inject - public ClpPlanOptimizerProvider() + public ClpPlanOptimizerProvider(FunctionMetadataManager functionManager, + StandardFunctionResolution functionResolution, + DeterminismEvaluator determinismEvaluator, + ExpressionOptimizer expressionOptimizer) { + this.functionManager = functionManager; + this.functionResolution = functionResolution; + this.determinismEvaluator = determinismEvaluator; + this.expressionOptimizer = expressionOptimizer; } @Override @@ -38,6 +54,9 @@ public Set getLogicalPlanOptimizers() @Override public Set getPhysicalPlanOptimizers() { - return ImmutableSet.of(new ClpPlanOptimizer()); + return ImmutableSet.of(new ClpPlanOptimizer(functionManager, + functionResolution, + determinismEvaluator, + expressionOptimizer)); } } From 3ceeca0bfa1a9e6caadc8236656913accf46b842 Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 28 Jun 2024 01:01:40 -0400 Subject: [PATCH 030/126] add support for cardinality split and like predicates --- .../com/yscope/presto/ClpPlanOptimizer.java | 88 +++++++++++++++++-- 1 file changed, 83 insertions(+), 5 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java index 7148c7caf57bb..77210ac3564d7 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java @@ -33,12 +33,12 @@ import com.facebook.presto.spi.relation.ExpressionOptimizer; import com.facebook.presto.spi.relation.RowExpression; import com.facebook.presto.spi.relation.SpecialFormExpression; +import com.facebook.presto.spi.relation.VariableReferenceExpression; import io.airlift.slice.Slice; import java.util.Optional; import static com.facebook.presto.spi.ConnectorPlanRewriter.rewriteWith; -import static com.facebook.presto.spi.relation.ExpressionOptimizer.Level.OPTIMIZED; public class ClpPlanOptimizer implements ConnectorPlanOptimizer @@ -76,6 +76,36 @@ private static String getLiteralString(ConstantExpression literal) return literal.toString(); } + private static String handleCardinalitySplit(RowExpression additionalPredicate) + { + CallExpression cardinalityExpression = (CallExpression) additionalPredicate; + if (!(cardinalityExpression.getArguments().size() == 1 && cardinalityExpression.getArguments() + .get(0) + .toString() + .startsWith("SPLIT"))) { + throw new RuntimeException("Unsupported predicate" + cardinalityExpression); + } + + CallExpression splitExpression = (CallExpression) cardinalityExpression.getArguments().get(0); + if (!(splitExpression.getArguments().size() == 3 && splitExpression.getArguments() + .get(0) instanceof VariableReferenceExpression && splitExpression.getArguments() + .get(1) instanceof ConstantExpression && splitExpression.getArguments() + .get(2) instanceof ConstantExpression)) { + throw new RuntimeException("Unsupported predicate" + splitExpression); + } + + VariableReferenceExpression variableReferenceExpression = + (VariableReferenceExpression) splitExpression.getArguments().get(0); + ConstantExpression startExpression = (ConstantExpression) splitExpression.getArguments().get(1); + ConstantExpression endExpression = (ConstantExpression) splitExpression.getArguments().get(2); + if (!(startExpression.getType() == VarcharType.VARCHAR && endExpression.toString().equals("2"))) { + throw new RuntimeException("Unsupported predicate" + splitExpression); + } + + String variableName = getVariableName(variableReferenceExpression.toString()); + return variableName + ": \"*" + getLiteralString(startExpression) + "*\""; + } + public static String buildKqlQuery(RowExpression additionalPredicate) { if (additionalPredicate instanceof SpecialFormExpression) { @@ -122,6 +152,53 @@ else if (specialFormExpression.getForm() == SpecialFormExpression.Form.IN) { } else if (additionalPredicate instanceof CallExpression) { CallExpression callExpression = (CallExpression) additionalPredicate; + // Handle "=(CARDINALITY(SPLIT(field, string, 2)), 2)" case specifically + // TODO: Handle it more generically + if (callExpression.getDisplayName().equals("=")) { + if (!(callExpression.getArguments().size() == 2 && callExpression.getArguments() + .get(1).toString().equals("2") && callExpression.getArguments() + .get(0) + .toString() + .startsWith("CARDINALITY"))) { + throw new RuntimeException("Unsupported predicate" + callExpression); + } + + return handleCardinalitySplit(callExpression.getArguments().get(0)); + } + + // Handle "<>(CARDINALITY(field), 2)" case specifically + if (callExpression.getDisplayName().equals("<>") && callExpression.getArguments().size() == 2 && + callExpression.getArguments().get(1).toString().equals("2") && + callExpression.getArguments().get(0).toString().startsWith("CARDINALITY")) { + return "NOT " + handleCardinalitySplit(callExpression.getArguments().get(0)); + } + + // Handle "not" case specifically + if (callExpression.getDisplayName().equals("not")) { + return "NOT(" + buildKqlQuery(callExpression.getArguments().get(0)) + ")"; + } + + // Handle "LIKE(FIELD, CAST(PATTERN))" case specifically + if (callExpression.getDisplayName().equals("LIKE")) { + if (!(callExpression.getArguments().size() == 2 && callExpression.getArguments() + .get(1) + .toString() + .startsWith("CAST"))) { + throw new RuntimeException("Unsupported predicate" + additionalPredicate); + } + + CallExpression castExpression = (CallExpression) callExpression.getArguments().get(1); + if (!(castExpression.getArguments().size() == 1 && castExpression.getArguments() + .get(0) instanceof ConstantExpression)) { + throw new RuntimeException("Unsupported predicate" + castExpression); + } + + String variableName = getVariableName(callExpression.getArguments().get(0).toString()); + ConstantExpression literal = (ConstantExpression) castExpression.getArguments().get(0); + String literalString = getLiteralString(literal); + return variableName + ": \"" + literalString.replace("%", "*") + "\""; + } + String variableName = getVariableName(callExpression.getArguments().get(0).toString()); ConstantExpression literal = (ConstantExpression) callExpression.getArguments().get(1); String literalString = getLiteralString(literal); @@ -150,7 +227,7 @@ else if (additionalPredicate instanceof CallExpression) { return variableName + " <= " + literalString; } } - throw new RuntimeException("Unsupported predicate type"); + throw new RuntimeException("Unsupported predicate" + additionalPredicate); } @Override @@ -184,9 +261,10 @@ public PlanNode visitFilter(FilterNode node, RewriteContext context) TableScanNode tableScanNode = (TableScanNode) node.getSource(); TableHandle tableHandle = tableScanNode.getTable(); ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle(); - RowExpression predicate = expressionOptimizer.optimize(node.getPredicate(), OPTIMIZED, session); - predicate = logicalRowExpressions.convertToConjunctiveNormalForm(predicate); - String query = buildKqlQuery(predicate); + // Remove them temporarily as we cannot handle io.airlift.joni.Regex +// RowExpression predicate = expressionOptimizer.optimize(node.getPredicate(), OPTIMIZED, session); +// predicate = logicalRowExpressions.convertToConjunctiveNormalForm(predicate); + String query = buildKqlQuery(node.getPredicate()); log.info("Query: " + query); ClpTableLayoutHandle clpTableLayoutHandle = new ClpTableLayoutHandle(clpTableHandle, Optional.of(query)); return new TableScanNode( From c2b997e6cb7f3f1cafbbc6875d517a3f09941354 Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 28 Jun 2024 01:02:01 -0400 Subject: [PATCH 031/126] temporary workaround --- .../java/com/facebook/presto/metadata/MetadataManager.java | 3 ++- .../src/main/java/com/facebook/presto/spi/ColumnMetadata.java | 4 ++-- .../main/java/com/facebook/presto/spi/VariableAllocator.java | 3 +-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/presto-main/src/main/java/com/facebook/presto/metadata/MetadataManager.java b/presto-main/src/main/java/com/facebook/presto/metadata/MetadataManager.java index 8a2b1ce55dfc8..cc4a1e4c5bc40 100644 --- a/presto-main/src/main/java/com/facebook/presto/metadata/MetadataManager.java +++ b/presto-main/src/main/java/com/facebook/presto/metadata/MetadataManager.java @@ -511,7 +511,8 @@ public Map getColumnHandles(Session session, TableHandle t ImmutableMap.Builder map = ImmutableMap.builder(); for (Entry mapEntry : handles.entrySet()) { - map.put(mapEntry.getKey().toLowerCase(ENGLISH), mapEntry.getValue()); +// map.put(mapEntry.getKey().toLowerCase(ENGLISH), mapEntry.getValue()); + map.put(mapEntry.getKey(), mapEntry.getValue()); } return map.build(); } diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/ColumnMetadata.java b/presto-spi/src/main/java/com/facebook/presto/spi/ColumnMetadata.java index 8d26f39cd0f8e..a25dd41b10b04 100644 --- a/presto-spi/src/main/java/com/facebook/presto/spi/ColumnMetadata.java +++ b/presto-spi/src/main/java/com/facebook/presto/spi/ColumnMetadata.java @@ -25,7 +25,6 @@ import static com.facebook.presto.spi.SchemaUtil.checkNotEmpty; import static java.util.Collections.emptyMap; import static java.util.Collections.unmodifiableMap; -import static java.util.Locale.ENGLISH; import static java.util.Objects.requireNonNull; public class ColumnMetadata @@ -80,7 +79,8 @@ public ColumnMetadata(String name, Type type, boolean nullable, String comment, requireNonNull(type, "type is null"); requireNonNull(properties, "properties is null"); - this.name = name.toLowerCase(ENGLISH); +// this.name = name.toLowerCase(ENGLISH); + this.name = name; this.type = type; this.comment = comment; this.extraInfo = extraInfo; diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/VariableAllocator.java b/presto-spi/src/main/java/com/facebook/presto/spi/VariableAllocator.java index a88c225d86fb5..fd8ba13174526 100644 --- a/presto-spi/src/main/java/com/facebook/presto/spi/VariableAllocator.java +++ b/presto-spi/src/main/java/com/facebook/presto/spi/VariableAllocator.java @@ -27,7 +27,6 @@ import java.util.stream.Collectors; import static java.util.Collections.unmodifiableMap; -import static java.util.Locale.ENGLISH; import static java.util.Objects.requireNonNull; public class VariableAllocator @@ -98,7 +97,7 @@ public VariableReferenceExpression newVariable(Optional sourceLo requireNonNull(type, "type is null"); // TODO: workaround for the fact that QualifiedName lowercases parts - nameHint = nameHint.toLowerCase(ENGLISH); +// nameHint = nameHint.toLowerCase(ENGLISH); // don't strip the tail if the only _ is the first character int index = nameHint.lastIndexOf("_"); From 3e510f5526e0c12156020fa3ff57cc54ff6fcb2f Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 18 Jul 2024 23:23:06 -0400 Subject: [PATCH 032/126] update clp connector version --- presto-clp/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-clp/pom.xml b/presto-clp/pom.xml index d0a4fea2ccdff..11a1d8bfef5e5 100644 --- a/presto-clp/pom.xml +++ b/presto-clp/pom.xml @@ -6,7 +6,7 @@ com.facebook.presto presto-root - 0.288-SNAPSHOT + 0.289-SNAPSHOT com.yscope.presto From 476ed3cdf17bd3c43295a2adbb5608e8fc8438e1 Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 25 Jul 2024 19:56:57 -0400 Subject: [PATCH 033/126] add new files --- .../dependency-reduced-pom.xml | 89 ++++++++ presto-cli/dependency-reduced-pom.xml | 111 +++++++++ .../dependency-reduced-pom.xml | 68 ++++++ .../dependency-reduced-pom.xml | 57 +++++ presto-verifier/dependency-reduced-pom.xml | 216 ++++++++++++++++++ 5 files changed, 541 insertions(+) create mode 100644 presto-benchmark-driver/dependency-reduced-pom.xml create mode 100644 presto-cli/dependency-reduced-pom.xml create mode 100644 presto-product-tests/dependency-reduced-pom.xml create mode 100644 presto-testing-server-launcher/dependency-reduced-pom.xml create mode 100644 presto-verifier/dependency-reduced-pom.xml diff --git a/presto-benchmark-driver/dependency-reduced-pom.xml b/presto-benchmark-driver/dependency-reduced-pom.xml new file mode 100644 index 0000000000000..1ab11a3a4ff48 --- /dev/null +++ b/presto-benchmark-driver/dependency-reduced-pom.xml @@ -0,0 +1,89 @@ + + + + presto-root + com.facebook.presto + 0.289-SNAPSHOT + + 4.0.0 + presto-benchmark-driver + presto-benchmark-driver + + + + maven-shade-plugin + + + package + + shade + + + true + executable + + + + ${main-class} + + + + + + + + + org.skife.maven + really-executable-jar-maven-plugin + + + package + + really-executable-jar + + + + + -Xmx1G + executable + + + + + + + com.facebook.presto + presto-testng-services + 0.289-SNAPSHOT + test + + + org.testng + testng + 7.5 + test + + + junit + junit + + + guice + com.google.inject + + + jcommander + com.beust + + + jquery + org.webjars + + + + + + ${project.parent.basedir} + com.facebook.presto.benchmark.driver.PrestoBenchmarkDriver + + diff --git a/presto-cli/dependency-reduced-pom.xml b/presto-cli/dependency-reduced-pom.xml new file mode 100644 index 0000000000000..bfdb7afcff90d --- /dev/null +++ b/presto-cli/dependency-reduced-pom.xml @@ -0,0 +1,111 @@ + + + + presto-root + com.facebook.presto + 0.289-SNAPSHOT + + 4.0.0 + presto-cli + presto-cli + + + + maven-shade-plugin + + + package + + shade + + + true + executable + + + + ${main-class} + + + + + + + + + org.basepom.maven + duplicate-finder-maven-plugin + + + + org.fusesource.jansi + jansi + + + + + + org.skife.maven + really-executable-jar-maven-plugin + + + package + + really-executable-jar + + + + + -Xmx1G + executable + + + + + + + org.testng + testng + 7.5 + test + + + junit + junit + + + guice + com.google.inject + + + jcommander + com.beust + + + jquery + org.webjars + + + + + com.squareup.okhttp3 + mockwebserver + 3.9.0 + test + + + bcprov-jdk15on + org.bouncycastle + + + junit + junit + + + + + + ${project.parent.basedir} + com.facebook.presto.cli.Presto + + diff --git a/presto-product-tests/dependency-reduced-pom.xml b/presto-product-tests/dependency-reduced-pom.xml new file mode 100644 index 0000000000000..26cdb4dc7cb46 --- /dev/null +++ b/presto-product-tests/dependency-reduced-pom.xml @@ -0,0 +1,68 @@ + + + + presto-root + com.facebook.presto + 0.289-SNAPSHOT + + 4.0.0 + presto-product-tests + presto-product-tests + + + + true + src/main/resources + + presto.env + + + + src/main/resources + + presto.env + + + + + + maven-shade-plugin + + + package + + shade + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + true + executable + + + + ${main-class} + + + + + + + + + + + 2.12.2 + ${project.parent.basedir} + true + com.facebook.presto.tests.TemptoProductTestRunner + + diff --git a/presto-testing-server-launcher/dependency-reduced-pom.xml b/presto-testing-server-launcher/dependency-reduced-pom.xml new file mode 100644 index 0000000000000..79f8bf68bda4b --- /dev/null +++ b/presto-testing-server-launcher/dependency-reduced-pom.xml @@ -0,0 +1,57 @@ + + + + presto-root + com.facebook.presto + 0.289-SNAPSHOT + + 4.0.0 + presto-testing-server-launcher + presto-testing-server-launcher + + + + maven-shade-plugin + + + package + + shade + + + true + executable + + + + ${main-class} + + + + + + + + + org.skife.maven + really-executable-jar-maven-plugin + + + package + + really-executable-jar + + + + + -Xmx1G + executable + + + + + + ${project.parent.basedir} + com.facebook.presto.server.testing.TestingPrestoServerLauncher + + diff --git a/presto-verifier/dependency-reduced-pom.xml b/presto-verifier/dependency-reduced-pom.xml new file mode 100644 index 0000000000000..0e41ca9358ac3 --- /dev/null +++ b/presto-verifier/dependency-reduced-pom.xml @@ -0,0 +1,216 @@ + + + + presto-root + com.facebook.presto + 0.289-SNAPSHOT + + 4.0.0 + presto-verifier + presto-verifier + + + + maven-shade-plugin + + + package + + shade + + + true + executable + + + + + ${main-class} + + + + + + + + + maven-dependency-plugin + + + unpack-launcher + prepare-package + + unpack-dependencies + + + false + launcher + provided + ${project.build.directory}/dependency/launcher + + + + + + maven-assembly-plugin + + + bin + package + + single + + + + tar.gz + + + src/main/assembly/presto-verifier.xml + + presto-verifier-${project.version} + false + + + + + + + + + org.jetbrains + annotations + 19.0.0 + provided + + + com.facebook.presto + presto-main + 0.289-SNAPSHOT + test-jar + test + + + com.facebook.presto + presto-memory + 0.289-SNAPSHOT + test + + + com.facebook.presto + presto-tests + 0.289-SNAPSHOT + test + + + presto-blackhole + com.facebook.presto + + + presto-function-namespace-managers + com.facebook.presto + + + tpch + io.airlift.tpch + + + h2 + com.h2database + + + + + com.facebook.presto + presto-tpch + 0.289-SNAPSHOT + test + + + tpch + io.airlift.tpch + + + + + com.facebook.airlift + testing + 0.209 + test + + + com.facebook.presto + testing-mysql-server-5 + 0.6 + test + + + com.facebook.presto + testing-mysql-server-base + 0.6 + test + + + command + io.airlift + + + + + org.testng + testng + 7.5 + test + + + junit + junit + + + guice + com.google.inject + + + jcommander + com.beust + + + jquery + org.webjars + + + + + org.assertj + assertj-core + 3.8.0 + test + + + com.facebook.presto + presto-testng-services + 0.289-SNAPSHOT + test + + + com.facebook.airlift + launcher + 0.209 + tar.gz + bin + provided + + + com.facebook.airlift + launcher + 0.209 + tar.gz + properties + provided + + + + ${project.artifactId} + ${project.parent.basedir} + com.facebook.presto.verifier.PrestoVerifier + + From 22845ee443380363e6ef88a70ffa6578ca05b263 Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 25 Jul 2024 19:58:40 -0400 Subject: [PATCH 034/126] avoid generating weird LIKE predicates --- .../sql/relational/SqlToRowExpressionTranslator.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/presto-main/src/main/java/com/facebook/presto/sql/relational/SqlToRowExpressionTranslator.java b/presto-main/src/main/java/com/facebook/presto/sql/relational/SqlToRowExpressionTranslator.java index c33430616a779..3c36739dd9f26 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/relational/SqlToRowExpressionTranslator.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/relational/SqlToRowExpressionTranslator.java @@ -914,10 +914,10 @@ protected RowExpression visitLikePredicate(LikePredicate node, Context context) return likeFunctionCall(value, call(getSourceLocation(node), "LIKE_PATTERN", functionResolution.likePatternFunction(), LIKE_PATTERN, pattern, escape)); } - RowExpression prefixOrSuffixMatch = generateLikePrefixOrSuffixMatch(value, pattern); - if (prefixOrSuffixMatch != null) { - return prefixOrSuffixMatch; - } +// RowExpression prefixOrSuffixMatch = generateLikePrefixOrSuffixMatch(value, pattern); +// if (prefixOrSuffixMatch != null) { +// return prefixOrSuffixMatch; +// } if (!functionResolution.supportsLikePatternFunction()) { return likeFunctionCall(value, pattern); From e9050b93366ce625a18f0dd4d0a73f39cdc3c47b Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 25 Jul 2024 23:28:46 -0400 Subject: [PATCH 035/126] a more robust ClpPlanOptimizer --- presto-clp/pom.xml | 5 - .../java/com/yscope/presto/ClpConnector.java | 11 +- .../java/com/yscope/presto/ClpErrorCode.java | 39 +++ .../java/com/yscope/presto/ClpExpression.java | 50 ++++ .../presto/ClpFilterToKqlConverter.java | 250 ++++++++++++++++++ .../com/yscope/presto/ClpPlanOptimizer.java | 230 ++-------------- .../presto/ClpPlanOptimizerProvider.java | 17 +- .../yscope/presto/TestClpRecordCursor.java | 9 +- 8 files changed, 386 insertions(+), 225 deletions(-) create mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpErrorCode.java create mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpExpression.java create mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java diff --git a/presto-clp/pom.xml b/presto-clp/pom.xml index 11a1d8bfef5e5..2174a44ba13a5 100644 --- a/presto-clp/pom.xml +++ b/presto-clp/pom.xml @@ -94,11 +94,6 @@ provided - - com.facebook.presto - presto-expressions - - io.airlift units diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java b/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java index 7d9cdaeb9c7b0..25b3c9e6f241c 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java @@ -15,6 +15,7 @@ import com.facebook.airlift.bootstrap.LifeCycleManager; import com.facebook.airlift.log.Logger; +import com.facebook.presto.common.type.TypeManager; import com.facebook.presto.spi.connector.Connector; import com.facebook.presto.spi.connector.ConnectorMetadata; import com.facebook.presto.spi.connector.ConnectorPlanOptimizerProvider; @@ -42,6 +43,7 @@ public class ClpConnector private final FunctionMetadataManager functionManager; private final StandardFunctionResolution functionResolution; private final RowExpressionService rowExpressionService; + private final TypeManager typeManager; @Inject public ClpConnector(LifeCycleManager lifeCycleManager, @@ -50,7 +52,8 @@ public ClpConnector(LifeCycleManager lifeCycleManager, ClpRecordSetProvider recordSetProvider, FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution, - RowExpressionService rowExpressionService) + RowExpressionService rowExpressionService, + TypeManager typeManager) { this.lifeCycleManager = requireNonNull(lifeCycleManager, "lifeCycleManager is null"); this.metadata = requireNonNull(metadata, "metadata is null"); @@ -59,15 +62,13 @@ public ClpConnector(LifeCycleManager lifeCycleManager, this.functionManager = requireNonNull(functionManager, "functionManager is null"); this.functionResolution = requireNonNull(functionResolution, "functionResolution is null"); this.rowExpressionService = requireNonNull(rowExpressionService, "rowExpressionService is null"); + this.typeManager = requireNonNull(typeManager, "typeManager is null"); } @Override public ConnectorPlanOptimizerProvider getConnectorPlanOptimizerProvider() { - return new ClpPlanOptimizerProvider(functionManager, - functionResolution, - rowExpressionService.getDeterminismEvaluator(), - rowExpressionService.getExpressionOptimizer()); + return new ClpPlanOptimizerProvider(functionManager, functionResolution, typeManager); } @Override diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpErrorCode.java b/presto-clp/src/main/java/com/yscope/presto/ClpErrorCode.java new file mode 100644 index 0000000000000..86c30792280d7 --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/ClpErrorCode.java @@ -0,0 +1,39 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +import com.facebook.presto.common.ErrorCode; +import com.facebook.presto.common.ErrorType; +import com.facebook.presto.spi.ErrorCodeSupplier; + +import static com.facebook.presto.common.ErrorType.EXTERNAL; + +public enum ClpErrorCode + implements ErrorCodeSupplier +{ + CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION(0, EXTERNAL); + + private final ErrorCode errorCode; + + ClpErrorCode(int code, ErrorType type) + { + errorCode = new ErrorCode(code + 0x0400_0000, name(), type); + } + + @Override + public ErrorCode toErrorCode() + { + return errorCode; + } +} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpExpression.java b/presto-clp/src/main/java/com/yscope/presto/ClpExpression.java new file mode 100644 index 0000000000000..64fde925be6a3 --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/ClpExpression.java @@ -0,0 +1,50 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +import com.facebook.presto.spi.relation.RowExpression; + +import java.util.Optional; + +public class ClpExpression +{ + private final Optional definition; + private final Optional remainingExpression; + + public ClpExpression(Optional definition, Optional remainingExpression) + { + this.definition = definition; + this.remainingExpression = remainingExpression; + } + + public ClpExpression(String definition) + { + this(Optional.of(definition), Optional.empty()); + } + + public ClpExpression(RowExpression remainingExpression) + { + this(Optional.empty(), Optional.of(remainingExpression)); + } + + public Optional getDefinition() + { + return definition; + } + + public Optional getRemainingExpression() + { + return remainingExpression; + } +} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java new file mode 100644 index 0000000000000..46c84c36dfc3c --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java @@ -0,0 +1,250 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +import com.facebook.presto.common.function.OperatorType; +import com.facebook.presto.common.type.TypeManager; +import com.facebook.presto.common.type.VarcharType; +import com.facebook.presto.spi.PrestoException; +import com.facebook.presto.spi.function.FunctionHandle; +import com.facebook.presto.spi.function.FunctionMetadata; +import com.facebook.presto.spi.function.FunctionMetadataManager; +import com.facebook.presto.spi.function.StandardFunctionResolution; +import com.facebook.presto.spi.relation.CallExpression; +import com.facebook.presto.spi.relation.ConstantExpression; +import com.facebook.presto.spi.relation.RowExpression; +import com.facebook.presto.spi.relation.RowExpressionVisitor; +import com.facebook.presto.spi.relation.SpecialFormExpression; +import com.facebook.presto.spi.relation.VariableReferenceExpression; +import com.google.common.collect.ImmutableSet; +import io.airlift.slice.Slice; + +import java.util.ArrayList; +import java.util.Optional; +import java.util.Set; + +import static com.facebook.presto.common.type.BooleanType.BOOLEAN; +import static com.facebook.presto.spi.relation.SpecialFormExpression.Form.AND; +import static com.yscope.presto.ClpErrorCode.CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION; +import static java.util.Objects.requireNonNull; + +public class ClpFilterToKqlConverter + implements RowExpressionVisitor +{ + private static final Set LOGICAL_BINARY_OPS_FILTER = ImmutableSet.of("=", "<", "<=", ">", ">=", "<>"); + + private final StandardFunctionResolution standardFunctionResolution; + private final FunctionMetadataManager functionMetadataManager; + private final TypeManager typeManager; + + public ClpFilterToKqlConverter(StandardFunctionResolution standardFunctionResolution, + FunctionMetadataManager functionMetadataManager, + TypeManager typeManager) + { + this.standardFunctionResolution = + requireNonNull(standardFunctionResolution, "standardFunctionResolution is null"); + this.functionMetadataManager = requireNonNull(functionMetadataManager, "function metadata manager is null"); + this.typeManager = requireNonNull(typeManager, "type manager is null"); + } + + private static String getLiteralString(ConstantExpression literal) + { + if (literal.getValue() instanceof Slice) { + return ((Slice) literal.getValue()).toStringUtf8(); + } + return literal.toString(); + } + + private static String getVariableName(VariableReferenceExpression variable) + { + String variableName = variable.getName(); + if (variableName.endsWith("_bigint") || variableName.endsWith("_double") || + variableName.endsWith("_varchar") || variableName.endsWith("_boolean")) { + return variableName.substring(0, variableName.lastIndexOf('_')); + } + return variableName; + } + + private ClpExpression handleNot(CallExpression node) + { + if (node.getArguments().size() != 1) { + throw new PrestoException(CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, + "NOT operator must have exactly one argument. Received: " + node); + } + + RowExpression input = node.getArguments().get(0); + ClpExpression expression = input.accept(this, null); + if (expression.getRemainingExpression().isPresent() || !expression.getDefinition().isPresent()) { + return new ClpExpression(node); + } + return new ClpExpression("NOT " + expression.getDefinition().get()); + } + + private ClpExpression handleAnd(SpecialFormExpression node) + { + StringBuilder queryBuilder = new StringBuilder(); + queryBuilder.append("("); + ArrayList remainingExpressions = new ArrayList<>(); + for (RowExpression argument : node.getArguments()) { + ClpExpression expression = argument.accept(this, null); + if (expression.getRemainingExpression().isPresent() || !expression.getDefinition().isPresent()) { + remainingExpressions.add(expression.getRemainingExpression().get()); + continue; + } + queryBuilder.append(expression.getDefinition()); + queryBuilder.append(" AND "); + } + if (remainingExpressions.size() == node.getArguments().size()) { + return new ClpExpression(node); + } + else if (!remainingExpressions.isEmpty()) { + return new ClpExpression(Optional.of(queryBuilder.substring(0, queryBuilder.length() - 5) + ")"), + Optional.of(new SpecialFormExpression(node.getSourceLocation(), + AND, + BOOLEAN, + remainingExpressions))); + } + return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 5) + ")"); + } + + private ClpExpression handleOr(SpecialFormExpression node) + { + StringBuilder queryBuilder = new StringBuilder(); + queryBuilder.append("("); + ArrayList remainingExpressions = new ArrayList<>(); + for (RowExpression argument : node.getArguments()) { + ClpExpression expression = argument.accept(this, null); + if (expression.getRemainingExpression().isPresent()) { + return new ClpExpression(node); + } + queryBuilder.append(expression.getDefinition()); + queryBuilder.append(" OR "); + } + return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 4) + ")"); + } + + private ClpExpression handleIn(SpecialFormExpression node) + { + if (!(node.getArguments().get(0) instanceof VariableReferenceExpression)) { + return new ClpExpression(node); + } + String variableName = getVariableName((VariableReferenceExpression) node.getArguments().get(0)); + StringBuilder queryBuilder = new StringBuilder(); + queryBuilder.append("("); + for (RowExpression argument : node.getArguments().subList(1, node.getArguments().size())) { + if (!(argument instanceof ConstantExpression)) { + return new ClpExpression(node); + } + ConstantExpression literal = (ConstantExpression) argument; + String literalString = getLiteralString(literal); + queryBuilder.append(variableName).append(": "); + if (literal.getType().equals(VarcharType.VARCHAR)) { + queryBuilder.append("\""); + queryBuilder.append(literalString); + queryBuilder.append("\""); + } + else { + queryBuilder.append(literalString); + } + queryBuilder.append(" OR "); + } + return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 4) + ")"); + } + + private ClpExpression handleLogicalBinary(String operator, CallExpression node) + { + if (node.getArguments().size() != 2) { + throw new PrestoException(CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, + "Logical binary operator must have exactly two arguments. Received: " + node); + } + + if (!(node.getArguments().get(0) instanceof VariableReferenceExpression) || + !(node.getArguments().get(1) instanceof ConstantExpression)) { + return new ClpExpression(node); + } + + String variableName = getVariableName((VariableReferenceExpression) node.getArguments().get(0)); + ConstantExpression literal = (ConstantExpression) node.getArguments().get(1); + String literalString = getLiteralString(literal); + if (operator.equals("=")) { + if (literal.getType().equals(VarcharType.VARCHAR)) { + return new ClpExpression(variableName + ": \"" + literalString + "\""); + } + else { + return new ClpExpression(variableName + ": " + literalString); + } + } + else if (operator.equals("<>")) { + if (literal.getType().equals(VarcharType.VARCHAR)) { + return new ClpExpression("NOT " + variableName + ": \"" + literalString + "\""); + } + else { + return new ClpExpression("NOT " + variableName + ": " + literalString); + } + } + else if (LOGICAL_BINARY_OPS_FILTER.contains(operator)) { + return new ClpExpression(variableName + " " + operator + " " + literalString); + } + else { + return new ClpExpression(node); + } + } + + @Override + public ClpExpression visitCall(CallExpression node, Void context) + { + FunctionHandle functionHandle = node.getFunctionHandle(); + if (standardFunctionResolution.isNotFunction(functionHandle)) { + return handleNot(node); + } + + FunctionMetadata functionMetadata = functionMetadataManager.getFunctionMetadata(node.getFunctionHandle()); + Optional operatorTypeOptional = functionMetadata.getOperatorType(); + if (operatorTypeOptional.isPresent()) { + OperatorType operatorType = operatorTypeOptional.get(); + if (operatorType.isComparisonOperator()) { + return handleLogicalBinary(operatorType.getOperator(), node); + } + } + + return new ClpExpression(node); + } + + @Override + public ClpExpression visitConstant(ConstantExpression node, Void context) + { + return new ClpExpression(getLiteralString(node)); + } + + @Override + public ClpExpression visitVariableReference(VariableReferenceExpression node, Void context) + { + return new ClpExpression(getVariableName(node)); + } + + @Override + public ClpExpression visitSpecialForm(SpecialFormExpression node, Void context) + { + switch (node.getForm()) { + case AND: + return handleAnd(node); + case OR: + return handleOr(node); + case IN: + return handleIn(node); + default: + return new ClpExpression(node); + } + } +} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java index 77210ac3564d7..2a338d85dee1a 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java @@ -14,8 +14,7 @@ package com.yscope.presto; import com.facebook.airlift.log.Logger; -import com.facebook.presto.common.type.VarcharType; -import com.facebook.presto.expressions.LogicalRowExpressions; +import com.facebook.presto.common.type.TypeManager; import com.facebook.presto.spi.ConnectorPlanOptimizer; import com.facebook.presto.spi.ConnectorPlanRewriter; import com.facebook.presto.spi.ConnectorSession; @@ -27,14 +26,7 @@ import com.facebook.presto.spi.plan.PlanNode; import com.facebook.presto.spi.plan.PlanNodeIdAllocator; import com.facebook.presto.spi.plan.TableScanNode; -import com.facebook.presto.spi.relation.CallExpression; -import com.facebook.presto.spi.relation.ConstantExpression; -import com.facebook.presto.spi.relation.DeterminismEvaluator; -import com.facebook.presto.spi.relation.ExpressionOptimizer; import com.facebook.presto.spi.relation.RowExpression; -import com.facebook.presto.spi.relation.SpecialFormExpression; -import com.facebook.presto.spi.relation.VariableReferenceExpression; -import io.airlift.slice.Slice; import java.util.Optional; @@ -44,190 +36,17 @@ public class ClpPlanOptimizer implements ConnectorPlanOptimizer { private static final Logger log = Logger.get(ClpPlanOptimizer.class); - private final LogicalRowExpressions logicalRowExpressions; - private final ExpressionOptimizer expressionOptimizer; + private final FunctionMetadataManager functionManager; + private final StandardFunctionResolution functionResolution; + private final TypeManager typeManager; public ClpPlanOptimizer(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution, - DeterminismEvaluator determinismEvaluator, - ExpressionOptimizer expressionOptimizer) + TypeManager typeManager) { - this.logicalRowExpressions = new LogicalRowExpressions( - determinismEvaluator, - functionResolution, - functionManager); - this.expressionOptimizer = expressionOptimizer; - } - - private static String getVariableName(String variableName) - { - if (variableName.endsWith("_bigint") || variableName.endsWith("_double") || - variableName.endsWith("_varchar") || variableName.endsWith("_boolean")) { - return variableName.substring(0, variableName.lastIndexOf('_')); - } - return variableName; - } - - private static String getLiteralString(ConstantExpression literal) - { - if (literal.getValue() instanceof Slice) { - return ((Slice) literal.getValue()).toStringUtf8(); - } - return literal.toString(); - } - - private static String handleCardinalitySplit(RowExpression additionalPredicate) - { - CallExpression cardinalityExpression = (CallExpression) additionalPredicate; - if (!(cardinalityExpression.getArguments().size() == 1 && cardinalityExpression.getArguments() - .get(0) - .toString() - .startsWith("SPLIT"))) { - throw new RuntimeException("Unsupported predicate" + cardinalityExpression); - } - - CallExpression splitExpression = (CallExpression) cardinalityExpression.getArguments().get(0); - if (!(splitExpression.getArguments().size() == 3 && splitExpression.getArguments() - .get(0) instanceof VariableReferenceExpression && splitExpression.getArguments() - .get(1) instanceof ConstantExpression && splitExpression.getArguments() - .get(2) instanceof ConstantExpression)) { - throw new RuntimeException("Unsupported predicate" + splitExpression); - } - - VariableReferenceExpression variableReferenceExpression = - (VariableReferenceExpression) splitExpression.getArguments().get(0); - ConstantExpression startExpression = (ConstantExpression) splitExpression.getArguments().get(1); - ConstantExpression endExpression = (ConstantExpression) splitExpression.getArguments().get(2); - if (!(startExpression.getType() == VarcharType.VARCHAR && endExpression.toString().equals("2"))) { - throw new RuntimeException("Unsupported predicate" + splitExpression); - } - - String variableName = getVariableName(variableReferenceExpression.toString()); - return variableName + ": \"*" + getLiteralString(startExpression) + "*\""; - } - - public static String buildKqlQuery(RowExpression additionalPredicate) - { - if (additionalPredicate instanceof SpecialFormExpression) { - SpecialFormExpression specialFormExpression = (SpecialFormExpression) additionalPredicate; - if (specialFormExpression.getForm() == SpecialFormExpression.Form.AND) { - StringBuilder queryBuilder = new StringBuilder(); - queryBuilder.append("("); - for (RowExpression argument : specialFormExpression.getArguments()) { - queryBuilder.append(buildKqlQuery(argument)); - queryBuilder.append(" AND "); - } - return queryBuilder.substring(0, queryBuilder.length() - 5) + ")"; - } - else if (specialFormExpression.getForm() == SpecialFormExpression.Form.OR) { - StringBuilder queryBuilder = new StringBuilder(); - queryBuilder.append("("); - for (RowExpression argument : specialFormExpression.getArguments()) { - queryBuilder.append(buildKqlQuery(argument)); - queryBuilder.append(" OR "); - } - return queryBuilder.substring(0, queryBuilder.length() - 4) + ")"; - } - else if (specialFormExpression.getForm() == SpecialFormExpression.Form.IN) { - String variableName = getVariableName(specialFormExpression.getArguments().get(0).toString()); - StringBuilder queryBuilder = new StringBuilder(); - queryBuilder.append("("); - for (RowExpression argument : specialFormExpression.getArguments() - .subList(1, specialFormExpression.getArguments().size())) { - ConstantExpression literal = (ConstantExpression) argument; - String literalString = getLiteralString(literal); - queryBuilder.append(variableName).append(": "); - if (literal.getType().equals(VarcharType.VARCHAR)) { - queryBuilder.append("\""); - queryBuilder.append(literalString); - queryBuilder.append("\""); - } - else { - queryBuilder.append(literalString); - } - queryBuilder.append(" OR "); - } - return queryBuilder.substring(0, queryBuilder.length() - 4) + ")"; - } - } - else if (additionalPredicate instanceof CallExpression) { - CallExpression callExpression = (CallExpression) additionalPredicate; - // Handle "=(CARDINALITY(SPLIT(field, string, 2)), 2)" case specifically - // TODO: Handle it more generically - if (callExpression.getDisplayName().equals("=")) { - if (!(callExpression.getArguments().size() == 2 && callExpression.getArguments() - .get(1).toString().equals("2") && callExpression.getArguments() - .get(0) - .toString() - .startsWith("CARDINALITY"))) { - throw new RuntimeException("Unsupported predicate" + callExpression); - } - - return handleCardinalitySplit(callExpression.getArguments().get(0)); - } - - // Handle "<>(CARDINALITY(field), 2)" case specifically - if (callExpression.getDisplayName().equals("<>") && callExpression.getArguments().size() == 2 && - callExpression.getArguments().get(1).toString().equals("2") && - callExpression.getArguments().get(0).toString().startsWith("CARDINALITY")) { - return "NOT " + handleCardinalitySplit(callExpression.getArguments().get(0)); - } - - // Handle "not" case specifically - if (callExpression.getDisplayName().equals("not")) { - return "NOT(" + buildKqlQuery(callExpression.getArguments().get(0)) + ")"; - } - - // Handle "LIKE(FIELD, CAST(PATTERN))" case specifically - if (callExpression.getDisplayName().equals("LIKE")) { - if (!(callExpression.getArguments().size() == 2 && callExpression.getArguments() - .get(1) - .toString() - .startsWith("CAST"))) { - throw new RuntimeException("Unsupported predicate" + additionalPredicate); - } - - CallExpression castExpression = (CallExpression) callExpression.getArguments().get(1); - if (!(castExpression.getArguments().size() == 1 && castExpression.getArguments() - .get(0) instanceof ConstantExpression)) { - throw new RuntimeException("Unsupported predicate" + castExpression); - } - - String variableName = getVariableName(callExpression.getArguments().get(0).toString()); - ConstantExpression literal = (ConstantExpression) castExpression.getArguments().get(0); - String literalString = getLiteralString(literal); - return variableName + ": \"" + literalString.replace("%", "*") + "\""; - } - - String variableName = getVariableName(callExpression.getArguments().get(0).toString()); - ConstantExpression literal = (ConstantExpression) callExpression.getArguments().get(1); - String literalString = getLiteralString(literal); - switch (callExpression.getDisplayName()) { - case "EQUAL": - if (literal.getType().equals(VarcharType.VARCHAR)) { - return variableName + ": \"" + literalString + "\""; - } - else { - return variableName + ": " + literalString; - } - case "<>": - if (literal.getType().equals(VarcharType.VARCHAR)) { - return "NOT " + variableName + ": \"" + literalString + "\""; - } - else { - return "NOT " + variableName + ": " + literalString; - } - case "GREATER_THAN": - return variableName + " > " + literalString; - case "GREATER_THAN_OR_EQUAL": - return variableName + " >= " + literalString; - case "LESS_THAN": - return variableName + " < " + literalString; - case "LESS_THAN_OR_EQUAL": - return variableName + " <= " + literalString; - } - } - throw new RuntimeException("Unsupported predicate" + additionalPredicate); + this.functionManager = functionManager; + this.functionResolution = functionResolution; + this.typeManager = typeManager; } @Override @@ -236,18 +55,16 @@ public PlanNode optimize(PlanNode maxSubplan, VariableAllocator variableAllocator, PlanNodeIdAllocator idAllocator) { - return rewriteWith(new Rewriter(session, idAllocator), maxSubplan); + return rewriteWith(new Rewriter(idAllocator), maxSubplan); } private class Rewriter extends ConnectorPlanRewriter { - private final ConnectorSession session; private final PlanNodeIdAllocator idAllocator; - public Rewriter(ConnectorSession session, PlanNodeIdAllocator idAllocator) + public Rewriter(PlanNodeIdAllocator idAllocator) { - this.session = session; this.idAllocator = idAllocator; } @@ -261,14 +78,17 @@ public PlanNode visitFilter(FilterNode node, RewriteContext context) TableScanNode tableScanNode = (TableScanNode) node.getSource(); TableHandle tableHandle = tableScanNode.getTable(); ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle(); - // Remove them temporarily as we cannot handle io.airlift.joni.Regex -// RowExpression predicate = expressionOptimizer.optimize(node.getPredicate(), OPTIMIZED, session); -// predicate = logicalRowExpressions.convertToConjunctiveNormalForm(predicate); - String query = buildKqlQuery(node.getPredicate()); - log.info("Query: " + query); - ClpTableLayoutHandle clpTableLayoutHandle = new ClpTableLayoutHandle(clpTableHandle, Optional.of(query)); - return new TableScanNode( - node.getSourceLocation(), + ClpExpression clpExpression = node.getPredicate() + .accept(new ClpFilterToKqlConverter(functionResolution, functionManager, typeManager), null); + Optional kqlQuery = clpExpression.getDefinition(); + Optional remainingPredicate = clpExpression.getRemainingExpression(); + if (!kqlQuery.isPresent()) { + return node; + } + log.info("KQL query: %s", kqlQuery.get()); + ClpTableLayoutHandle clpTableLayoutHandle = new ClpTableLayoutHandle(clpTableHandle, kqlQuery); + TableScanNode newTableScanNode = new TableScanNode( + tableScanNode.getSourceLocation(), idAllocator.getNextId(), new TableHandle( tableHandle.getConnectorId(), @@ -280,6 +100,14 @@ public PlanNode visitFilter(FilterNode node, RewriteContext context) tableScanNode.getTableConstraints(), tableScanNode.getCurrentConstraint(), tableScanNode.getEnforcedConstraint()); + if (!remainingPredicate.isPresent()) { + return newTableScanNode; + } + + return new FilterNode(node.getSourceLocation(), + idAllocator.getNextId(), + newTableScanNode, + remainingPredicate.get()); } } } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizerProvider.java b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizerProvider.java index a6deb548a58ee..73c5c695aa0f0 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizerProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizerProvider.java @@ -13,12 +13,11 @@ */ package com.yscope.presto; +import com.facebook.presto.common.type.TypeManager; import com.facebook.presto.spi.ConnectorPlanOptimizer; import com.facebook.presto.spi.connector.ConnectorPlanOptimizerProvider; import com.facebook.presto.spi.function.FunctionMetadataManager; import com.facebook.presto.spi.function.StandardFunctionResolution; -import com.facebook.presto.spi.relation.DeterminismEvaluator; -import com.facebook.presto.spi.relation.ExpressionOptimizer; import com.google.common.collect.ImmutableSet; import javax.inject.Inject; @@ -30,19 +29,16 @@ public class ClpPlanOptimizerProvider { private final FunctionMetadataManager functionManager; private final StandardFunctionResolution functionResolution; - private final DeterminismEvaluator determinismEvaluator; - private final ExpressionOptimizer expressionOptimizer; + private final TypeManager typeManager; @Inject public ClpPlanOptimizerProvider(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution, - DeterminismEvaluator determinismEvaluator, - ExpressionOptimizer expressionOptimizer) + TypeManager typeManager) { this.functionManager = functionManager; this.functionResolution = functionResolution; - this.determinismEvaluator = determinismEvaluator; - this.expressionOptimizer = expressionOptimizer; + this.typeManager = typeManager; } @Override @@ -54,9 +50,6 @@ public Set getLogicalPlanOptimizers() @Override public Set getPhysicalPlanOptimizers() { - return ImmutableSet.of(new ClpPlanOptimizer(functionManager, - functionResolution, - determinismEvaluator, - expressionOptimizer)); + return ImmutableSet.of(new ClpPlanOptimizer(functionManager, functionResolution, typeManager)); } } diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java b/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java index 2917e01007cfa..247e97ab55686 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java @@ -17,6 +17,7 @@ import com.facebook.presto.metadata.FunctionAndTypeManager; import com.facebook.presto.spi.relation.CallExpression; import com.facebook.presto.spi.relation.VariableReferenceExpression; +import com.facebook.presto.sql.relational.FunctionResolution; import com.google.common.collect.ImmutableList; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; @@ -127,12 +128,16 @@ public void testPredicate() "a_bigint", BigintType.BIGINT), constant(1L, BigintType.BIGINT))); - String query = ClpPlanOptimizer.buildKqlQuery(callExpression); + Optional query = + callExpression.accept(new ClpFilterToKqlConverter(new FunctionResolution(functionAndTypeManager.getFunctionAndTypeResolver()), + functionAndTypeManager, + functionAndTypeManager), null).getDefinition(); + assertTrue(query.isPresent()); ClpRecordSetProvider recordSetProvider = new ClpRecordSetProvider(clpClient); ClpRecordSet recordSet = (ClpRecordSet) recordSetProvider.getRecordSet( ClpTransactionHandle.INSTANCE, SESSION, - new ClpSplit("default", "test_1_table", Optional.of(query)), + new ClpSplit("default", "test_1_table", query), new ArrayList<>(clpClient.listColumns("test_1_table"))); assertNotNull(recordSet, "recordSet is null"); ClpRecordCursor cursor = (ClpRecordCursor) recordSet.cursor(); From fc7a4977287872b18035f9ea14e4c106c64b7ef6 Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 26 Jul 2024 09:15:36 -0400 Subject: [PATCH 036/126] fix a bug and add LIKE predicate handler --- .../presto/ClpFilterToKqlConverter.java | 48 +++++++++++++++++-- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java index 46c84c36dfc3c..57eae149e2be6 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java @@ -102,7 +102,7 @@ private ClpExpression handleAnd(SpecialFormExpression node) remainingExpressions.add(expression.getRemainingExpression().get()); continue; } - queryBuilder.append(expression.getDefinition()); + queryBuilder.append(expression.getDefinition().get()); queryBuilder.append(" AND "); } if (remainingExpressions.size() == node.getArguments().size()) { @@ -125,10 +125,10 @@ private ClpExpression handleOr(SpecialFormExpression node) ArrayList remainingExpressions = new ArrayList<>(); for (RowExpression argument : node.getArguments()) { ClpExpression expression = argument.accept(this, null); - if (expression.getRemainingExpression().isPresent()) { + if (expression.getRemainingExpression().isPresent() || !expression.getDefinition().isPresent()) { return new ClpExpression(node); } - queryBuilder.append(expression.getDefinition()); + queryBuilder.append(expression.getDefinition().get()); queryBuilder.append(" OR "); } return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 4) + ")"); @@ -162,6 +162,44 @@ private ClpExpression handleIn(SpecialFormExpression node) return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 4) + ")"); } + private ClpExpression handleLike(CallExpression node) + { + if (node.getArguments().size() != 2) { + throw new PrestoException(CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, + "LIKE operator must have exactly two arguments. Received: " + node); + } + + if (!(node.getArguments().get(0) instanceof VariableReferenceExpression)) { + return new ClpExpression(node); + } + + String variableName = getVariableName((VariableReferenceExpression) node.getArguments().get(0)); + RowExpression argument = node.getArguments().get(1); + if (argument instanceof ConstantExpression) { + ConstantExpression literal = (ConstantExpression) argument; + String literalString = getLiteralString(literal); + return new ClpExpression(variableName + ": \"" + literalString.replace("%", "*") + "\""); + } + else if (argument instanceof CallExpression) { + CallExpression callExpression = (CallExpression) argument; + FunctionHandle functionHandle = callExpression.getFunctionHandle(); + if (!standardFunctionResolution.isCastFunction(functionHandle)) { + return new ClpExpression(node); + } + if (callExpression.getArguments().size() != 1) { + throw new PrestoException(CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, + "CAST function must have exactly one argument. Received: " + callExpression); + } + if (!(callExpression.getArguments().get(0) instanceof ConstantExpression)) { + return new ClpExpression(node); + } + ConstantExpression literal = (ConstantExpression) callExpression.getArguments().get(0); + String literalString = getLiteralString(literal); + return new ClpExpression(variableName + ": \"" + literalString.replace("%", "*") + "\""); + } + return new ClpExpression(node); + } + private ClpExpression handleLogicalBinary(String operator, CallExpression node) { if (node.getArguments().size() != 2) { @@ -209,6 +247,10 @@ public ClpExpression visitCall(CallExpression node, Void context) return handleNot(node); } + if (standardFunctionResolution.isLikeFunction(functionHandle)) { + return handleLike(node); + } + FunctionMetadata functionMetadata = functionMetadataManager.getFunctionMetadata(node.getFunctionHandle()); Optional operatorTypeOptional = functionMetadata.getOperatorType(); if (operatorTypeOptional.isPresent()) { From 5346beb50e4a9608fcba7bb6d2f3aaef11b920a1 Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 26 Jul 2024 09:41:49 -0400 Subject: [PATCH 037/126] fix a bug --- .../yscope/presto/ClpFilterToKqlConverter.java | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java index 57eae149e2be6..9597df48b4223 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java @@ -109,11 +109,17 @@ private ClpExpression handleAnd(SpecialFormExpression node) return new ClpExpression(node); } else if (!remainingExpressions.isEmpty()) { - return new ClpExpression(Optional.of(queryBuilder.substring(0, queryBuilder.length() - 5) + ")"), - Optional.of(new SpecialFormExpression(node.getSourceLocation(), - AND, - BOOLEAN, - remainingExpressions))); + if (remainingExpressions.size() == 1) { + return new ClpExpression(Optional.of(queryBuilder.substring(0, queryBuilder.length() - 5) + ")"), + Optional.of(remainingExpressions.get(0))); + } + else { + return new ClpExpression(Optional.of(queryBuilder.substring(0, queryBuilder.length() - 5) + ")"), + Optional.of(new SpecialFormExpression(node.getSourceLocation(), + AND, + BOOLEAN, + remainingExpressions))); + } } return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 5) + ")"); } From 323150701291eb034784e965518a8e159c0708f7 Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 26 Jul 2024 19:24:04 -0400 Subject: [PATCH 038/126] add array support --- .../main/java/com/yscope/presto/ClpClient.java | 5 ++++- .../java/com/yscope/presto/ClpRecordCursor.java | 17 +++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index d6a16cc96001b..6cb0f79ffbf48 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -14,6 +14,7 @@ package com.yscope.presto; import com.facebook.airlift.log.Logger; +import com.facebook.presto.common.type.ArrayType; import com.facebook.presto.common.type.BigintType; import com.facebook.presto.common.type.BooleanType; import com.facebook.presto.common.type.DoubleType; @@ -327,10 +328,12 @@ private Set parseSchemaTreeFile(Path schemaMapsFile) case ClpString: case VarString: case DateString: - case UnstructuredArray: case NullValue: prestoType = VarcharType.VARCHAR; break; + case UnstructuredArray: + prestoType = new ArrayType(VarcharType.VARCHAR); + break; case Boolean: prestoType = BooleanType.BOOLEAN; break; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java index da12db9501dde..ceee661c8921f 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java @@ -13,6 +13,7 @@ */ package com.yscope.presto; +import com.facebook.presto.common.block.BlockBuilder; import com.facebook.presto.common.type.Type; import com.facebook.presto.spi.RecordCursor; import com.fasterxml.jackson.databind.JsonNode; @@ -121,17 +122,21 @@ public Slice getSlice(int field) { checkFieldType(field, createUnboundedVarcharType()); JsonNode node = fields.get(field); - if (node.isArray()) { - return Slices.utf8Slice(node.toString()); - } - else { - return Slices.utf8Slice(node.asText()); - } + return Slices.utf8Slice(node.asText()); } @Override public Object getObject(int field) { + JsonNode node = fields.get(field); + if (node.isArray()) { + BlockBuilder builder = VARCHAR.createBlockBuilder(null, node.size()); + Iterator elements = node.elements(); + while (elements.hasNext()) { + VARCHAR.writeString(builder, elements.next().asText()); + } + return builder.build(); + } throw new UnsupportedOperationException(); } From a1511f1b8b6a68c3f343fe1ace5d48ee890571bc Mon Sep 17 00:00:00 2001 From: wraymo Date: Sat, 27 Jul 2024 16:16:50 -0400 Subject: [PATCH 039/126] Add a unit test --- .../presto/TestClpFilterToKqlConverter.java | 115 ++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 presto-clp/src/test/java/com/yscope/presto/TestClpFilterToKqlConverter.java diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpFilterToKqlConverter.java b/presto-clp/src/test/java/com/yscope/presto/TestClpFilterToKqlConverter.java new file mode 100644 index 0000000000000..2df12ca6008ae --- /dev/null +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpFilterToKqlConverter.java @@ -0,0 +1,115 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +import com.facebook.presto.common.type.BigintType; +import com.facebook.presto.metadata.FunctionAndTypeManager; +import com.facebook.presto.spi.relation.CallExpression; +import com.facebook.presto.spi.relation.RowExpression; +import com.facebook.presto.spi.relation.SpecialFormExpression; +import com.facebook.presto.spi.relation.VariableReferenceExpression; +import com.facebook.presto.sql.analyzer.FunctionAndTypeResolver; +import com.facebook.presto.sql.relational.FunctionResolution; +import com.google.common.collect.ImmutableList; +import io.airlift.slice.Slices; +import org.testng.annotations.Test; + +import java.util.Optional; + +import static com.facebook.presto.common.function.OperatorType.EQUAL; +import static com.facebook.presto.common.function.OperatorType.GREATER_THAN; +import static com.facebook.presto.common.type.BooleanType.BOOLEAN; +import static com.facebook.presto.common.type.VarcharType.VARCHAR; +import static com.facebook.presto.metadata.CastType.CAST; +import static com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager; +import static com.facebook.presto.sql.analyzer.TypeSignatureProvider.fromTypes; +import static com.facebook.presto.sql.relational.Expressions.call; +import static com.facebook.presto.sql.relational.Expressions.constant; +import static com.facebook.presto.type.LikePatternType.LIKE_PATTERN; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +@Test(singleThreaded = true) +public class TestClpFilterToKqlConverter +{ + @Test + public void testSqlToKqlConverter() + { + FunctionAndTypeManager functionAndTypeManager = createTestFunctionAndTypeManager(); + FunctionResolution functionResolution = + new FunctionResolution(functionAndTypeManager.getFunctionAndTypeResolver()); + FunctionAndTypeResolver functionAndTypeResolver = functionAndTypeManager.getFunctionAndTypeResolver(); + // (a > 0 OR b like 'b%') AND (lower(c.e) = 'hello world' OR c IS NULL) + SpecialFormExpression firstOrExpression = + new SpecialFormExpression(SpecialFormExpression.Form.OR, + BOOLEAN, + new CallExpression(GREATER_THAN.name(), + functionAndTypeManager.resolveOperator(GREATER_THAN, fromTypes( + BigintType.BIGINT, BigintType.BIGINT)), + BOOLEAN, + ImmutableList.of(new VariableReferenceExpression(Optional.empty(), + "a_bigint", + BigintType.BIGINT), + constant(0L, BigintType.BIGINT))), + call("LIKE", + functionResolution.likeVarcharFunction(), + BOOLEAN, + new VariableReferenceExpression(Optional.empty(), "b_varchar", + VARCHAR), + call(CAST.name(), + functionAndTypeResolver.lookupCast("CAST", VARCHAR, LIKE_PATTERN), + LIKE_PATTERN, + constant(Slices.utf8Slice("b%"), VARCHAR)))); + SpecialFormExpression secondOrExpression = + new SpecialFormExpression(SpecialFormExpression.Form.OR, + BOOLEAN, + call(EQUAL.name(), functionResolution.comparisonFunction(EQUAL, VARCHAR, VARCHAR), BOOLEAN, + call("lower", + functionAndTypeResolver.lookupFunction("lower", fromTypes(VARCHAR)), + VARCHAR, + new VariableReferenceExpression(Optional.empty(), "c.e", + VARCHAR)), + constant(Slices.utf8Slice("hello world"), VARCHAR)), + new SpecialFormExpression(SpecialFormExpression.Form.IS_NULL, + BOOLEAN, + new VariableReferenceExpression(Optional.empty(), "c", VARCHAR))); + SpecialFormExpression andExpression = new SpecialFormExpression(SpecialFormExpression.Form.AND, + BOOLEAN, + firstOrExpression, + secondOrExpression); + ClpExpression clpExpression = + andExpression.accept(new ClpFilterToKqlConverter( + new FunctionResolution(functionAndTypeManager.getFunctionAndTypeResolver()), + functionAndTypeManager, + functionAndTypeManager), + null); + Optional definition = clpExpression.getDefinition(); + Optional remainingExpression = clpExpression.getRemainingExpression(); + assertTrue(definition.isPresent()); + assertTrue(remainingExpression.isPresent()); + assertEquals(definition.get(), "((a > 0 OR b: \"b*\"))"); + assertEquals(remainingExpression.get(), new SpecialFormExpression(SpecialFormExpression.Form.OR, + BOOLEAN, + call(EQUAL.name(), functionResolution.comparisonFunction(EQUAL, VARCHAR, VARCHAR), BOOLEAN, + call("lower", + functionAndTypeResolver.lookupFunction("lower", fromTypes(VARCHAR)), + VARCHAR, + new VariableReferenceExpression(Optional.empty(), "c.e", + VARCHAR)), + constant(Slices.utf8Slice("hello world"), VARCHAR)), + new SpecialFormExpression(SpecialFormExpression.Form.IS_NULL, + BOOLEAN, + new VariableReferenceExpression(Optional.empty(), "c", VARCHAR)))); + } +} From 36bddecea2e3a990900264494dfe2bc6880148ce Mon Sep 17 00:00:00 2001 From: wraymo Date: Sun, 28 Jul 2024 00:33:17 -0400 Subject: [PATCH 040/126] Fix bugs --- .../presto/ClpFilterToKqlConverter.java | 24 ++++++++++++------- .../com/yscope/presto/ClpPlanOptimizer.java | 7 +++++- .../presto/TestClpFilterToKqlConverter.java | 15 +++++++++++- .../yscope/presto/TestClpRecordCursor.java | 11 ++++++--- 4 files changed, 44 insertions(+), 13 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java index 9597df48b4223..7666c5c324534 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java @@ -16,6 +16,7 @@ import com.facebook.presto.common.function.OperatorType; import com.facebook.presto.common.type.TypeManager; import com.facebook.presto.common.type.VarcharType; +import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.function.FunctionHandle; import com.facebook.presto.spi.function.FunctionMetadata; @@ -31,6 +32,7 @@ import io.airlift.slice.Slice; import java.util.ArrayList; +import java.util.Map; import java.util.Optional; import java.util.Set; @@ -47,15 +49,18 @@ public class ClpFilterToKqlConverter private final StandardFunctionResolution standardFunctionResolution; private final FunctionMetadataManager functionMetadataManager; private final TypeManager typeManager; + private final Map assignments; public ClpFilterToKqlConverter(StandardFunctionResolution standardFunctionResolution, FunctionMetadataManager functionMetadataManager, - TypeManager typeManager) + TypeManager typeManager, + Map assignments) { this.standardFunctionResolution = requireNonNull(standardFunctionResolution, "standardFunctionResolution is null"); this.functionMetadataManager = requireNonNull(functionMetadataManager, "function metadata manager is null"); this.typeManager = requireNonNull(typeManager, "type manager is null"); + this.assignments = requireNonNull(assignments, "assignments is null"); } private static String getLiteralString(ConstantExpression literal) @@ -66,9 +71,9 @@ private static String getLiteralString(ConstantExpression literal) return literal.toString(); } - private static String getVariableName(VariableReferenceExpression variable) + private String getVariableName(VariableReferenceExpression variable) { - String variableName = variable.getName(); + String variableName = ((ClpColumnHandle) assignments.get(variable)).getColumnName(); if (variableName.endsWith("_bigint") || variableName.endsWith("_double") || variableName.endsWith("_varchar") || variableName.endsWith("_boolean")) { return variableName.substring(0, variableName.lastIndexOf('_')); @@ -96,16 +101,19 @@ private ClpExpression handleAnd(SpecialFormExpression node) StringBuilder queryBuilder = new StringBuilder(); queryBuilder.append("("); ArrayList remainingExpressions = new ArrayList<>(); + boolean hasDefinition = false; for (RowExpression argument : node.getArguments()) { ClpExpression expression = argument.accept(this, null); - if (expression.getRemainingExpression().isPresent() || !expression.getDefinition().isPresent()) { + if (expression.getDefinition().isPresent()) { + hasDefinition = true; + queryBuilder.append(expression.getDefinition().get()); + queryBuilder.append(" AND "); + } + if (expression.getRemainingExpression().isPresent()) { remainingExpressions.add(expression.getRemainingExpression().get()); - continue; } - queryBuilder.append(expression.getDefinition().get()); - queryBuilder.append(" AND "); } - if (remainingExpressions.size() == node.getArguments().size()) { + if (!hasDefinition) { return new ClpExpression(node); } else if (!remainingExpressions.isEmpty()) { diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java index 2a338d85dee1a..2bdd9731df8cc 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java @@ -15,6 +15,7 @@ import com.facebook.airlift.log.Logger; import com.facebook.presto.common.type.TypeManager; +import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.ConnectorPlanOptimizer; import com.facebook.presto.spi.ConnectorPlanRewriter; import com.facebook.presto.spi.ConnectorSession; @@ -27,7 +28,9 @@ import com.facebook.presto.spi.plan.PlanNodeIdAllocator; import com.facebook.presto.spi.plan.TableScanNode; import com.facebook.presto.spi.relation.RowExpression; +import com.facebook.presto.spi.relation.VariableReferenceExpression; +import java.util.Map; import java.util.Optional; import static com.facebook.presto.spi.ConnectorPlanRewriter.rewriteWith; @@ -76,10 +79,12 @@ public PlanNode visitFilter(FilterNode node, RewriteContext context) } TableScanNode tableScanNode = (TableScanNode) node.getSource(); + Map assignments = tableScanNode.getAssignments(); TableHandle tableHandle = tableScanNode.getTable(); ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle(); ClpExpression clpExpression = node.getPredicate() - .accept(new ClpFilterToKqlConverter(functionResolution, functionManager, typeManager), null); + .accept(new ClpFilterToKqlConverter(functionResolution, functionManager, typeManager, assignments), + null); Optional kqlQuery = clpExpression.getDefinition(); Optional remainingPredicate = clpExpression.getRemainingExpression(); if (!kqlQuery.isPresent()) { diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpFilterToKqlConverter.java b/presto-clp/src/test/java/com/yscope/presto/TestClpFilterToKqlConverter.java index 2df12ca6008ae..8ca347435f94e 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpFilterToKqlConverter.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpFilterToKqlConverter.java @@ -15,6 +15,7 @@ import com.facebook.presto.common.type.BigintType; import com.facebook.presto.metadata.FunctionAndTypeManager; +import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.relation.CallExpression; import com.facebook.presto.spi.relation.RowExpression; import com.facebook.presto.spi.relation.SpecialFormExpression; @@ -25,6 +26,7 @@ import io.airlift.slice.Slices; import org.testng.annotations.Test; +import java.util.Map; import java.util.Optional; import static com.facebook.presto.common.function.OperatorType.EQUAL; @@ -88,11 +90,21 @@ public void testSqlToKqlConverter() BOOLEAN, firstOrExpression, secondOrExpression); + Map assignments = Map.of( + new VariableReferenceExpression(Optional.empty(), "a_bigint", BigintType.BIGINT), + new ClpColumnHandle("a_bigint", BigintType.BIGINT, false), + new VariableReferenceExpression(Optional.empty(), "b_varchar", VARCHAR), + new ClpColumnHandle("b_varchar", VARCHAR, false), + new VariableReferenceExpression(Optional.empty(), "c.e", VARCHAR), + new ClpColumnHandle("c.e", VARCHAR, false), + new VariableReferenceExpression(Optional.empty(), "c", VARCHAR), + new ClpColumnHandle("c", VARCHAR, false)); ClpExpression clpExpression = andExpression.accept(new ClpFilterToKqlConverter( new FunctionResolution(functionAndTypeManager.getFunctionAndTypeResolver()), functionAndTypeManager, - functionAndTypeManager), + functionAndTypeManager, + assignments), null); Optional definition = clpExpression.getDefinition(); Optional remainingExpression = clpExpression.getRemainingExpression(); @@ -103,6 +115,7 @@ public void testSqlToKqlConverter() BOOLEAN, call(EQUAL.name(), functionResolution.comparisonFunction(EQUAL, VARCHAR, VARCHAR), BOOLEAN, call("lower", + functionAndTypeResolver.lookupFunction("lower", fromTypes(VARCHAR)), VARCHAR, new VariableReferenceExpression(Optional.empty(), "c.e", diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java b/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java index 247e97ab55686..0a9c74683640e 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java @@ -15,6 +15,7 @@ import com.facebook.presto.common.type.BigintType; import com.facebook.presto.metadata.FunctionAndTypeManager; +import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.relation.CallExpression; import com.facebook.presto.spi.relation.VariableReferenceExpression; import com.facebook.presto.sql.relational.FunctionResolution; @@ -25,6 +26,7 @@ import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.Optional; import static com.facebook.presto.common.function.OperatorType.EQUAL; @@ -128,10 +130,13 @@ public void testPredicate() "a_bigint", BigintType.BIGINT), constant(1L, BigintType.BIGINT))); + Map assignments = Map.of( + new VariableReferenceExpression(Optional.empty(), "a_bigint", BigintType.BIGINT), + new ClpColumnHandle("a_bigint", BigintType.BIGINT, false)); Optional query = - callExpression.accept(new ClpFilterToKqlConverter(new FunctionResolution(functionAndTypeManager.getFunctionAndTypeResolver()), - functionAndTypeManager, - functionAndTypeManager), null).getDefinition(); + callExpression.accept(new ClpFilterToKqlConverter( + new FunctionResolution(functionAndTypeManager.getFunctionAndTypeResolver()), + functionAndTypeManager, functionAndTypeManager, assignments), null).getDefinition(); assertTrue(query.isPresent()); ClpRecordSetProvider recordSetProvider = new ClpRecordSetProvider(clpClient); ClpRecordSet recordSet = (ClpRecordSet) recordSetProvider.getRecordSet( From 9bede0dfce474ec0b8e240e34a825bd8c1620b0c Mon Sep 17 00:00:00 2001 From: wraymo Date: Sun, 28 Jul 2024 19:42:32 -0400 Subject: [PATCH 041/126] make it more robust for logical binaries --- .../presto/ClpFilterToKqlConverter.java | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java index 7666c5c324534..2360485910b13 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java @@ -14,6 +14,7 @@ package com.yscope.presto; import com.facebook.presto.common.function.OperatorType; +import com.facebook.presto.common.type.Type; import com.facebook.presto.common.type.TypeManager; import com.facebook.presto.common.type.VarcharType; import com.facebook.presto.spi.ColumnHandle; @@ -226,11 +227,17 @@ private ClpExpression handleLogicalBinary(String operator, CallExpression node) return new ClpExpression(node); } - String variableName = getVariableName((VariableReferenceExpression) node.getArguments().get(0)); - ConstantExpression literal = (ConstantExpression) node.getArguments().get(1); - String literalString = getLiteralString(literal); + ClpExpression leftExpression = node.getArguments().get(0).accept(this, null); + ClpExpression rightExpression = node.getArguments().get(1).accept(this, null); + if (!leftExpression.getDefinition().isPresent() || !rightExpression.getDefinition().isPresent()) { + return new ClpExpression(node); + } + + String variableName = leftExpression.getDefinition().get(); + String literalString = rightExpression.getDefinition().get(); + Type literalType = node.getArguments().get(1).getType(); if (operator.equals("=")) { - if (literal.getType().equals(VarcharType.VARCHAR)) { + if (literalType.equals(VarcharType.VARCHAR)) { return new ClpExpression(variableName + ": \"" + literalString + "\""); } else { @@ -238,14 +245,14 @@ private ClpExpression handleLogicalBinary(String operator, CallExpression node) } } else if (operator.equals("<>")) { - if (literal.getType().equals(VarcharType.VARCHAR)) { + if (literalType.equals(VarcharType.VARCHAR)) { return new ClpExpression("NOT " + variableName + ": \"" + literalString + "\""); } else { return new ClpExpression("NOT " + variableName + ": " + literalString); } } - else if (LOGICAL_BINARY_OPS_FILTER.contains(operator)) { + else if (LOGICAL_BINARY_OPS_FILTER.contains(operator) && !literalType.equals(VarcharType.VARCHAR)) { return new ClpExpression(variableName + " " + operator + " " + literalString); } else { From 519fa7f651efc07a8001c758c6f297221e3ce268 Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 16 Aug 2024 14:31:35 -0400 Subject: [PATCH 042/126] add projection support --- .../java/com/yscope/presto/ClpClient.java | 20 +++++++++++-------- .../yscope/presto/ClpRecordSetProvider.java | 8 +++++--- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 6cb0f79ffbf48..d64e4a7f9d8f9 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -224,14 +224,14 @@ public Set listColumns(String tableName) return polymorphicColumnHandles; } - public BufferedReader getRecords(String tableName, Optional query) + public BufferedReader getRecords(String tableName, Optional query, List columns) { if (!listTables().contains(tableName)) { return null; } if (query.isPresent()) { - return searchTable(tableName, query.get()); + return searchTable(tableName, query.get(), columns); } else { Path decompressFile = decompressDir.resolve(tableName).resolve("original"); @@ -252,15 +252,19 @@ public BufferedReader getRecords(String tableName, Optional query) } } - private BufferedReader searchTable(String tableName, String query) + private BufferedReader searchTable(String tableName, String query, List columns) { Path tableArchiveDir = Paths.get(config.getClpArchiveDir(), tableName); try { - ProcessBuilder processBuilder = - new ProcessBuilder(executablePath.toString(), - "s", - tableArchiveDir.toString(), - query); + List argumentList = new ArrayList<>(); + argumentList.add(executablePath.toString()); + argumentList.add("s"); + argumentList.add(tableArchiveDir.toString()); + argumentList.add(query); + argumentList.add("--projection"); + argumentList.addAll(columns); + log.info("Argument list: %s", argumentList.toString()); + ProcessBuilder processBuilder = new ProcessBuilder(argumentList); Process process = processBuilder.start(); return new BufferedReader(new InputStreamReader(process.getInputStream())); } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java index dde555784d8f3..c66df442b9a17 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java @@ -47,8 +47,10 @@ public RecordSet getRecordSet(ConnectorTransactionHandle transactionHandle, for (ColumnHandle handle : columns) { handles.add((ClpColumnHandle) handle); } - return new ClpRecordSet(clpClient.getRecords(clpSplit.getTableName(), clpSplit.getQuery()), - clpClient.getConfig().isPolymorphicTypeEnabled(), - handles.build()); + ImmutableList clpColumnHandles = handles.build(); + return new ClpRecordSet(clpClient.getRecords(clpSplit.getTableName(), + clpSplit.getQuery(), + clpColumnHandles.stream().map(ClpColumnHandle::getColumnName).collect(ImmutableList.toImmutableList())), + clpClient.getConfig().isPolymorphicTypeEnabled(), clpColumnHandles); } } From eeedf2d4b05549e74cf21abb364290960b34a712 Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 16 Aug 2024 14:36:49 -0400 Subject: [PATCH 043/126] add clp module --- presto-native-execution/velox | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-native-execution/velox b/presto-native-execution/velox index c550daba47db2..772f4290b8cb6 160000 --- a/presto-native-execution/velox +++ b/presto-native-execution/velox @@ -1 +1 @@ -Subproject commit c550daba47db2a17221c8ec0144450f32bd9a56a +Subproject commit 772f4290b8cb60262751a0143474ee2e6abf17b4 From 2122ec50eda46584d516d26323b123c62ea10915 Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 16 Aug 2024 15:00:59 -0400 Subject: [PATCH 044/126] update submodules --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 6fb925ff13ecf..67eef0a186202 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "presto-native-execution/velox"] path = presto-native-execution/velox - url = https://github.com/facebookincubator/velox.git + url = https://github.com/wraymo/velox.git From 6e0c386f248558cf8e4ddc369f42aa00ea84d14a Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 16 Aug 2024 15:49:15 -0400 Subject: [PATCH 045/126] update submodules --- .gitmodules | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 67eef0a186202..25faf6506b549 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,4 @@ [submodule "presto-native-execution/velox"] path = presto-native-execution/velox - url = https://github.com/wraymo/velox.git + url = https://github.com/wraymo/velox.git + branch = clp_integration From 9788a01f4239138296ba5500f3a9e6905aee5875 Mon Sep 17 00:00:00 2001 From: wraymo Date: Mon, 19 Aug 2024 15:18:21 -0400 Subject: [PATCH 046/126] add clp worker support --- .../main/types/PrestoToVeloxConnector.h | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.h b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.h index eb33dfb54ca1d..e01ce0e43b7f0 100644 --- a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.h +++ b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.h @@ -214,4 +214,30 @@ class TpchPrestoToVeloxConnector final : public PrestoToVeloxConnector { std::unique_ptr createConnectorProtocol() const final; }; + +class ClpPrestoToVeloxConnector final : public PrestoToVeloxConnector { + public: + explicit ClpPrestoToVeloxConnector(std::string connectorName) + : PrestoToVeloxConnector(std::move(connectorName)) {} + + std::unique_ptr toVeloxSplit( + const protocol::ConnectorId& catalogId, + const protocol::ConnectorSplit* connectorSplit) const final; + + std::unique_ptr toVeloxColumnHandle( + const protocol::ColumnHandle* column, + const TypeParser& typeParser) const final; + + std::unique_ptr toVeloxTableHandle( + const protocol::TableHandle& tableHandle, + const VeloxExprConverter& exprConverter, + const TypeParser& typeParser, + std::unordered_map< + std::string, + std::shared_ptr>& assignments) + const final; + + std::unique_ptr createConnectorProtocol() + const final; +}; } // namespace facebook::presto From cd076f25a320279ee2a5a85aa0ed5ef24ca74d94 Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 20 Aug 2024 15:56:43 -0400 Subject: [PATCH 047/126] add more methods --- .../main/types/PrestoToVeloxConnector.cpp | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp index c525f88e35300..b0279ad3311c1 100644 --- a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp +++ b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp @@ -18,6 +18,10 @@ #include "presto_cpp/presto_protocol/connector/tpch/TpchConnectorProtocol.h" #include +#include "velox/connectors/clp/ClpColumnHandle.h> +#include "velox/connectors/clp/ClpConnector.h" +#include "velox/connectors/clp/ClpConnectorSplit.h" +#include "velox/connectors/clp/ClpTableHandle.h" #include "velox/connectors/hive/HiveConnector.h" #include "velox/connectors/hive/HiveConnectorSplit.h" #include "velox/connectors/hive/HiveDataSink.h" @@ -1552,4 +1556,52 @@ std::unique_ptr TpchPrestoToVeloxConnector::createConnectorProtocol() const { return std::make_unique(); } + +std::unique_ptr +ClpPrestoToVeloxConnector::toVeloxSplit( + const protocol::ConnectorId& catalogId, + const protocol::ConnectorSplit* connectorSplit) const { + auto clpSplit = dynamic_cast(connectorSplit); + VELOX_CHECK_NOT_NULL( + clpSplit, "Unexpected split type {}", connectorSplit->_type); + return std::make_unique( + catalogId, clpSplit->schemaName, clpSplit->tableName, clpSplit->query); +} + +std::unique_ptr +ClpPrestoToVeloxConnector::toVeloxColumnHandle( + const protocol::ColumnHandle* column, + const TypeParser& typeParser) const { + auto clpColumn = dynamic_cast(column); + VELOX_CHECK_NOT_NULL( + clpColumn, "Unexpected column handle type {}", column->_type); + // TODO(Ray): need to write a parser for the type + return std::make_unique( + clpColumn->columnName, clpColumn->columnType, clpColumn->nullable); +} + +std::unique_ptr +ClpPrestoToVeloxConnector::toVeloxTableHandle( + const protocol::TableHandle& tableHandle, + const VeloxExprConverter& exprConverter, + const TypeParser& typeParser, + std::unordered_map< + std::string, + std::shared_ptr>& assignments) const { + auto clpLayout = + std::dynamic_pointer_cast( + tableHandle.connectorTableLayout); + VELOX_CHECK_NOT_NULL( + clpLayout, + "Unexpected layout type {}", + tableHandle.connectorTableLayout->_type); + return std::make_unique( + tableHandle.connectorId, clpLayout->table.tableName); +} + +std::unique_ptr +ClpPrestoToVeloxConnector::createConnectorProtocol() const { + return std::make_unique(); +} + } // namespace facebook::presto From 2342ed506059aac32833bd481add2276dc6a4c9a Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 20 Aug 2024 21:34:52 -0400 Subject: [PATCH 048/126] add more methods --- presto-native-execution/presto_cpp/main/PrestoServer.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/presto-native-execution/presto_cpp/main/PrestoServer.cpp b/presto-native-execution/presto_cpp/main/PrestoServer.cpp index 0473640632edf..f115a20f17fc6 100644 --- a/presto-native-execution/presto_cpp/main/PrestoServer.cpp +++ b/presto-native-execution/presto_cpp/main/PrestoServer.cpp @@ -271,6 +271,8 @@ void PrestoServer::run() { std::make_unique("iceberg")); registerPrestoToVeloxConnector( std::make_unique("tpch")); + registerPrestoToVeloxConnector( + std::make_unique("clp")); // Presto server uses system catalog or system schema in other catalogs // in different places in the code. All these resolve to the SystemConnector. // Depending on where the operator or column is used, different prefixes can From da6795307f5d2ab810d2756002e099a1bf73f51f Mon Sep 17 00:00:00 2001 From: wraymo Date: Wed, 21 Aug 2024 15:20:30 -0400 Subject: [PATCH 049/126] modify CMakeLists.txt --- presto-native-execution/presto_cpp/main/CMakeLists.txt | 2 +- presto-native-execution/presto_cpp/main/tests/CMakeLists.txt | 1 + .../presto_cpp/main/types/tests/CMakeLists.txt | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/presto-native-execution/presto_cpp/main/CMakeLists.txt b/presto-native-execution/presto_cpp/main/CMakeLists.txt index c06e00edf834c..aa6f4e5f3d98d 100644 --- a/presto-native-execution/presto_cpp/main/CMakeLists.txt +++ b/presto-native-execution/presto_cpp/main/CMakeLists.txt @@ -105,7 +105,7 @@ add_executable(presto_server PrestoMain.cpp) # "undefined reference to `vtable for velox::connector::tpch::TpchTableHandle`" # TODO: Fix these errors. target_link_libraries(presto_server presto_server_lib velox_hive_connector - velox_tpch_connector) + velox_tpch_connector velox_clp_connector) # Clang requires explicit linking with libatomic. if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" diff --git a/presto-native-execution/presto_cpp/main/tests/CMakeLists.txt b/presto-native-execution/presto_cpp/main/tests/CMakeLists.txt index 1643ca5e17bab..334efb6f8b4b8 100644 --- a/presto-native-execution/presto_cpp/main/tests/CMakeLists.txt +++ b/presto-native-execution/presto_cpp/main/tests/CMakeLists.txt @@ -44,6 +44,7 @@ target_link_libraries( $ velox_hive_connector velox_tpch_connector + velox_clp_connector velox_presto_serializer velox_functions_prestosql velox_aggregates diff --git a/presto-native-execution/presto_cpp/main/types/tests/CMakeLists.txt b/presto-native-execution/presto_cpp/main/types/tests/CMakeLists.txt index 28f73aff40b80..953cb9376fe0e 100644 --- a/presto-native-execution/presto_cpp/main/types/tests/CMakeLists.txt +++ b/presto-native-execution/presto_cpp/main/types/tests/CMakeLists.txt @@ -26,6 +26,7 @@ target_link_libraries( velox_dwio_orc_reader velox_hive_connector velox_tpch_connector + velox_clp_connector velox_exec velox_dwio_common_exception presto_type_converter @@ -62,6 +63,7 @@ target_link_libraries( velox_functions_lib velox_hive_connector velox_tpch_connector + velox_clp_connector velox_hive_partition_function velox_presto_serializer velox_serialization @@ -93,6 +95,7 @@ target_link_libraries( velox_dwio_common velox_hive_connector velox_tpch_connector + velox_clp_connector GTest::gtest GTest::gtest_main) From fde90b31ec21a9e407a9d383c18c7ab0e662949f Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 23 Aug 2024 20:33:16 -0400 Subject: [PATCH 050/126] add clp data source support --- .../main/types/PrestoToVeloxConnector.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp index b0279ad3311c1..ab1f00138880f 100644 --- a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp +++ b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp @@ -18,7 +18,7 @@ #include "presto_cpp/presto_protocol/connector/tpch/TpchConnectorProtocol.h" #include -#include "velox/connectors/clp/ClpColumnHandle.h> +#include "velox/connectors/clp/ClpColumnHandle.h" #include "velox/connectors/clp/ClpConnector.h" #include "velox/connectors/clp/ClpConnectorSplit.h" #include "velox/connectors/clp/ClpTableHandle.h" @@ -1565,7 +1565,7 @@ ClpPrestoToVeloxConnector::toVeloxSplit( VELOX_CHECK_NOT_NULL( clpSplit, "Unexpected split type {}", connectorSplit->_type); return std::make_unique( - catalogId, clpSplit->schemaName, clpSplit->tableName, clpSplit->query); + catalogId, clpSplit->schemaName, clpSplit->tableName); } std::unique_ptr @@ -1575,9 +1575,10 @@ ClpPrestoToVeloxConnector::toVeloxColumnHandle( auto clpColumn = dynamic_cast(column); VELOX_CHECK_NOT_NULL( clpColumn, "Unexpected column handle type {}", column->_type); - // TODO(Ray): need to write a parser for the type - return std::make_unique( - clpColumn->columnName, clpColumn->columnType, clpColumn->nullable); + return std::make_unique( + clpColumn->columnName, + typeParser.parse(clpColumn->columnType), + clpColumn->nullable); } std::unique_ptr @@ -1596,7 +1597,7 @@ ClpPrestoToVeloxConnector::toVeloxTableHandle( "Unexpected layout type {}", tableHandle.connectorTableLayout->_type); return std::make_unique( - tableHandle.connectorId, clpLayout->table.tableName); + tableHandle.connectorId, clpLayout->table.tableName, clpLayout->query); } std::unique_ptr From 04e8afdad7d1fe74c2fb2c12c4572bdd534d71fd Mon Sep 17 00:00:00 2001 From: wraymo Date: Wed, 28 Aug 2024 11:12:41 -0400 Subject: [PATCH 051/126] remove duplicate values and add null support --- .../presto_cpp/main/types/PrestoToVeloxConnector.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp index ab1f00138880f..8854701c6a606 100644 --- a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp +++ b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp @@ -19,7 +19,6 @@ #include #include "velox/connectors/clp/ClpColumnHandle.h" -#include "velox/connectors/clp/ClpConnector.h" #include "velox/connectors/clp/ClpConnectorSplit.h" #include "velox/connectors/clp/ClpTableHandle.h" #include "velox/connectors/hive/HiveConnector.h" From 4419bb1c67524237bf21ab8c77017fc68d2cd1b0 Mon Sep 17 00:00:00 2001 From: wraymo Date: Wed, 4 Sep 2024 21:42:15 +0000 Subject: [PATCH 052/126] add some configuration examples --- .gitmodules | 2 +- .../etc/catalog/hive.properties | 6 +++++- presto-native-execution/etc/config.properties | 8 ++++---- presto-native-execution/etc/node.properties | 4 +++- .../etc_coordinator/catalog/clp.properties | 4 ++++ .../etc_coordinator/catalog/hive.properties | 4 ++++ .../etc_coordinator/catalog/postgresql.properties | 3 +++ .../etc_coordinator/config.properties | 15 +++++++++++++++ .../etc_coordinator/jvm.config | 10 ++++++++++ .../etc_coordinator/log.properties | 1 + .../etc_coordinator/node.properties | 3 +++ .../etc_worker/catalog/clp.properties | 4 ++++ .../etc_worker/catalog/hive.properties | 4 ++++ .../etc_worker/catalog/postgresql.properties | 3 +++ .../etc_worker/config.properties | 6 ++++++ presto-native-execution/etc_worker/jvm.config | 10 ++++++++++ presto-native-execution/etc_worker/log.properties | 1 + .../etc_worker/node.properties | 3 +++ .../presto_cpp/main/CMakeLists.txt | 1 + .../presto_cpp/main/tests/CMakeLists.txt | 1 + 20 files changed, 86 insertions(+), 7 deletions(-) create mode 100644 presto-native-execution/etc_coordinator/catalog/clp.properties create mode 100644 presto-native-execution/etc_coordinator/catalog/hive.properties create mode 100644 presto-native-execution/etc_coordinator/catalog/postgresql.properties create mode 100644 presto-native-execution/etc_coordinator/config.properties create mode 100644 presto-native-execution/etc_coordinator/jvm.config create mode 100644 presto-native-execution/etc_coordinator/log.properties create mode 100644 presto-native-execution/etc_coordinator/node.properties create mode 100644 presto-native-execution/etc_worker/catalog/clp.properties create mode 100644 presto-native-execution/etc_worker/catalog/hive.properties create mode 100644 presto-native-execution/etc_worker/catalog/postgresql.properties create mode 100644 presto-native-execution/etc_worker/config.properties create mode 100644 presto-native-execution/etc_worker/jvm.config create mode 100644 presto-native-execution/etc_worker/log.properties create mode 100644 presto-native-execution/etc_worker/node.properties diff --git a/.gitmodules b/.gitmodules index 25faf6506b549..bb184984a4368 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "presto-native-execution/velox"] - path = presto-native-execution/velox + path = presto-native-execution/velox url = https://github.com/wraymo/velox.git branch = clp_integration diff --git a/presto-native-execution/etc/catalog/hive.properties b/presto-native-execution/etc/catalog/hive.properties index 466b7e664e44f..ee8abe93af853 100644 --- a/presto-native-execution/etc/catalog/hive.properties +++ b/presto-native-execution/etc/catalog/hive.properties @@ -1 +1,5 @@ -connector.name=hive +connector.name=hive-hadoop2 +hive.metastore=file +hive.metastore.catalog.dir=file:///root/presto/presto-native-execution/hive_catalog +hive.parquet.use-column-names=true +file-column-names-read-as-lower-case=true diff --git a/presto-native-execution/etc/config.properties b/presto-native-execution/etc/config.properties index b29e663f30ebf..9a9c35d0cdc7a 100644 --- a/presto-native-execution/etc/config.properties +++ b/presto-native-execution/etc/config.properties @@ -1,6 +1,6 @@ -discovery.uri=http://127.0.0.1:58215 -presto.version=testversion +discovery.uri=http://127.0.0.1:8080 +presto.version=0.289-SNAPSHOT-c8a1099 http-server.http.port=7777 shutdown-onset-sec=1 -register-test-functions=true -runtime-metrics-collection-enabled=true +register-test-functions=false +runtime-metrics-collection-enabled=false diff --git a/presto-native-execution/etc/node.properties b/presto-native-execution/etc/node.properties index 1d92b7ace8087..180bdb2e68e0a 100644 --- a/presto-native-execution/etc/node.properties +++ b/presto-native-execution/etc/node.properties @@ -1,3 +1,5 @@ -node.environment=testing +node.environment=production node.internal-address=127.0.0.1 node.location=testing-location +node.id=worker +node.data-dir=/root/presto/presto-native-execution/data_velox_worker \ No newline at end of file diff --git a/presto-native-execution/etc_coordinator/catalog/clp.properties b/presto-native-execution/etc_coordinator/catalog/clp.properties new file mode 100644 index 0000000000000..0feade69e2944 --- /dev/null +++ b/presto-native-execution/etc_coordinator/catalog/clp.properties @@ -0,0 +1,4 @@ +connector.name=clp +executable-path=/root/clp/components/core/build/clp-s +archive-dir=/root/presto/presto-native-execution/clp_archive +polymorphic-type-enabled=true \ No newline at end of file diff --git a/presto-native-execution/etc_coordinator/catalog/hive.properties b/presto-native-execution/etc_coordinator/catalog/hive.properties new file mode 100644 index 0000000000000..9109cb18ef86c --- /dev/null +++ b/presto-native-execution/etc_coordinator/catalog/hive.properties @@ -0,0 +1,4 @@ +connector.name=hive-hadoop2 +hive.metastore=file +hive.metastore.catalog.dir=file:///root/presto/presto-native-execution/hive_catalog +hive.parquet.use-column-names=true diff --git a/presto-native-execution/etc_coordinator/catalog/postgresql.properties b/presto-native-execution/etc_coordinator/catalog/postgresql.properties new file mode 100644 index 0000000000000..ca28b9ceb038e --- /dev/null +++ b/presto-native-execution/etc_coordinator/catalog/postgresql.properties @@ -0,0 +1,3 @@ +connector.name=postgresql +connection-url=jdbc:postgresql://localhost:5432/mydb +connection-user=root diff --git a/presto-native-execution/etc_coordinator/config.properties b/presto-native-execution/etc_coordinator/config.properties new file mode 100644 index 0000000000000..994b2cd3503f3 --- /dev/null +++ b/presto-native-execution/etc_coordinator/config.properties @@ -0,0 +1,15 @@ +coordinator=true +node-scheduler.include-coordinator=false +http-server.http.port=8080 +query.max-memory=4GB +query.max-memory-per-node=4GB +discovery-server.enabled=true +discovery.uri=http://localhost:8080 +task.max-worker-threads=1 +task.concurrency=1 +experimental.internal-communication.thrift-transport-enabled=true +optimizer.optimize-hash-generation=false +regex-library=RE2J +use-alternative-function-signatures=true +inline-sql-functions=false +nested-data-serialization-enabled=false \ No newline at end of file diff --git a/presto-native-execution/etc_coordinator/jvm.config b/presto-native-execution/etc_coordinator/jvm.config new file mode 100644 index 0000000000000..a943e2f2cd1d9 --- /dev/null +++ b/presto-native-execution/etc_coordinator/jvm.config @@ -0,0 +1,10 @@ +-server +-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5006 +-Xmx16G +-XX:+UseG1GC +-XX:G1HeapRegionSize=32M +-XX:+UseGCOverheadLimit +-XX:+ExplicitGCInvokesConcurrent +-XX:+HeapDumpOnOutOfMemoryError +-XX:+ExitOnOutOfMemoryError +-Djdk.attach.allowAttachSelf=true diff --git a/presto-native-execution/etc_coordinator/log.properties b/presto-native-execution/etc_coordinator/log.properties new file mode 100644 index 0000000000000..3abc29ce3d86a --- /dev/null +++ b/presto-native-execution/etc_coordinator/log.properties @@ -0,0 +1 @@ +com.facebook.presto=INFO \ No newline at end of file diff --git a/presto-native-execution/etc_coordinator/node.properties b/presto-native-execution/etc_coordinator/node.properties new file mode 100644 index 0000000000000..977d0f74a5a14 --- /dev/null +++ b/presto-native-execution/etc_coordinator/node.properties @@ -0,0 +1,3 @@ +node.environment=production +node.id=coordinator +node.data-dir=/root/presto/presto-native-execution/data_coordinator \ No newline at end of file diff --git a/presto-native-execution/etc_worker/catalog/clp.properties b/presto-native-execution/etc_worker/catalog/clp.properties new file mode 100644 index 0000000000000..0feade69e2944 --- /dev/null +++ b/presto-native-execution/etc_worker/catalog/clp.properties @@ -0,0 +1,4 @@ +connector.name=clp +executable-path=/root/clp/components/core/build/clp-s +archive-dir=/root/presto/presto-native-execution/clp_archive +polymorphic-type-enabled=true \ No newline at end of file diff --git a/presto-native-execution/etc_worker/catalog/hive.properties b/presto-native-execution/etc_worker/catalog/hive.properties new file mode 100644 index 0000000000000..9109cb18ef86c --- /dev/null +++ b/presto-native-execution/etc_worker/catalog/hive.properties @@ -0,0 +1,4 @@ +connector.name=hive-hadoop2 +hive.metastore=file +hive.metastore.catalog.dir=file:///root/presto/presto-native-execution/hive_catalog +hive.parquet.use-column-names=true diff --git a/presto-native-execution/etc_worker/catalog/postgresql.properties b/presto-native-execution/etc_worker/catalog/postgresql.properties new file mode 100644 index 0000000000000..538a221ee015b --- /dev/null +++ b/presto-native-execution/etc_worker/catalog/postgresql.properties @@ -0,0 +1,3 @@ +connector.name=postgresql +connection-url=jdbc:postgresql://localhost:5432/mydb +connection-user=raymo diff --git a/presto-native-execution/etc_worker/config.properties b/presto-native-execution/etc_worker/config.properties new file mode 100644 index 0000000000000..3a5190ddbd859 --- /dev/null +++ b/presto-native-execution/etc_worker/config.properties @@ -0,0 +1,6 @@ +coordinator=false +http-server.http.port=7777 +query.max-memory=4GB +query.max-memory-per-node=4GB +discovery.uri=http://localhost:8080 +regex-library=RE2J diff --git a/presto-native-execution/etc_worker/jvm.config b/presto-native-execution/etc_worker/jvm.config new file mode 100644 index 0000000000000..75403dd8a1a38 --- /dev/null +++ b/presto-native-execution/etc_worker/jvm.config @@ -0,0 +1,10 @@ +-server +-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5007 +-Xmx16G +-XX:+UseG1GC +-XX:G1HeapRegionSize=32M +-XX:+UseGCOverheadLimit +-XX:+ExplicitGCInvokesConcurrent +-XX:+HeapDumpOnOutOfMemoryError +-XX:+ExitOnOutOfMemoryError +-Djdk.attach.allowAttachSelf=true diff --git a/presto-native-execution/etc_worker/log.properties b/presto-native-execution/etc_worker/log.properties new file mode 100644 index 0000000000000..3abc29ce3d86a --- /dev/null +++ b/presto-native-execution/etc_worker/log.properties @@ -0,0 +1 @@ +com.facebook.presto=INFO \ No newline at end of file diff --git a/presto-native-execution/etc_worker/node.properties b/presto-native-execution/etc_worker/node.properties new file mode 100644 index 0000000000000..6a2e3ec052a6c --- /dev/null +++ b/presto-native-execution/etc_worker/node.properties @@ -0,0 +1,3 @@ +node.environment=production +node.id=worker +node.data-dir=/root/presto/presto-native-execution/data_worker \ No newline at end of file diff --git a/presto-native-execution/presto_cpp/main/CMakeLists.txt b/presto-native-execution/presto_cpp/main/CMakeLists.txt index aa6f4e5f3d98d..4db26887f9785 100644 --- a/presto-native-execution/presto_cpp/main/CMakeLists.txt +++ b/presto-native-execution/presto_cpp/main/CMakeLists.txt @@ -99,6 +99,7 @@ set_property(TARGET presto_server_lib PROPERTY JOB_POOL_LINK presto_link_job_pool) add_executable(presto_server PrestoMain.cpp) +target_link_options(presto_server PRIVATE "-no-pie") # Moving velox_hive_connector and velox_tpch_connector to presto_server_lib # results in multiple link errors similar to the one below only on GCC. diff --git a/presto-native-execution/presto_cpp/main/tests/CMakeLists.txt b/presto-native-execution/presto_cpp/main/tests/CMakeLists.txt index 334efb6f8b4b8..c599af9e9b558 100644 --- a/presto-native-execution/presto_cpp/main/tests/CMakeLists.txt +++ b/presto-native-execution/presto_cpp/main/tests/CMakeLists.txt @@ -23,6 +23,7 @@ add_executable( SessionPropertiesTest.cpp TaskManagerTest.cpp QueryContextManagerTest.cpp) +target_link_options(presto_server_test PRIVATE "-no-pie") if(DEFINED PRESTO_MEMORY_CHECKER_TYPE AND PRESTO_MEMORY_CHECKER_TYPE STREQUAL "LINUX_MEMORY_CHECKER") From 38a40491cbe368145b73974dc3f42c7307de9fbb Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 12 Sep 2024 10:22:50 -0400 Subject: [PATCH 053/126] add CLP Split support --- .../java/com/yscope/presto/ClpClient.java | 71 ++++++++++++++----- .../com/yscope/presto/ClpColumnHandle.java | 14 ++++ .../yscope/presto/ClpRecordSetProvider.java | 3 +- .../main/java/com/yscope/presto/ClpSplit.java | 9 +++ .../com/yscope/presto/ClpSplitManager.java | 14 ++-- .../com/yscope/presto/TestClpMetadata.java | 2 +- .../yscope/presto/TestClpRecordCursor.java | 13 ++-- 7 files changed, 95 insertions(+), 31 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index d64e4a7f9d8f9..1523b02c22693 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -21,6 +21,7 @@ import com.facebook.presto.common.type.Type; import com.facebook.presto.common.type.VarcharType; import com.github.luben.zstd.ZstdInputStream; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.yscope.presto.schema.SchemaNode; import com.yscope.presto.schema.SchemaTree; @@ -60,6 +61,7 @@ public class ClpClient private final ClpConfig config; private final Path executablePath; private final Map> tableNameToColumnHandles; + private final Map> tableNameToArchiveIds; private final Path decompressDir; private Set tableNames; @@ -68,6 +70,7 @@ public ClpClient(ClpConfig config) { this.config = requireNonNull(config, "config is null"); this.tableNameToColumnHandles = new HashMap<>(); + this.tableNameToArchiveIds = new HashMap<>(); this.executablePath = getExecutablePath(); this.decompressDir = Paths.get(System.getProperty("java.io.tmpdir"), "clp_decompress"); } @@ -183,6 +186,32 @@ public Set listTables() return this.tableNames; } + public List listArchiveIds(String tableName) + { + if (tableNameToArchiveIds.containsKey(tableName)) { + return tableNameToArchiveIds.get(tableName); + } + Path tableDir = Paths.get(config.getClpArchiveDir(), tableName); + if (!Files.exists(tableDir) || !Files.isDirectory(tableDir)) { + return ImmutableList.of(); + } + + try (DirectoryStream stream = Files.newDirectoryStream(tableDir)) { + ImmutableList.Builder archiveIds = ImmutableList.builder(); + for (Path path : stream) { + if (Files.isDirectory(path)) { + archiveIds.add(path.getFileName().toString()); + } + } + List archiveIdsList = archiveIds.build(); + tableNameToArchiveIds.put(tableName, archiveIdsList); + return archiveIdsList; + } + catch (Exception e) { + return ImmutableList.of(); + } + } + public Set listColumns(String tableName) { if (tableNameToColumnHandles.containsKey(tableName)) { @@ -224,35 +253,38 @@ public Set listColumns(String tableName) return polymorphicColumnHandles; } - public BufferedReader getRecords(String tableName, Optional query, List columns) + public BufferedReader getRecords(String tableName, String archiveId, Optional query, List columns) { if (!listTables().contains(tableName)) { return null; } if (query.isPresent()) { - return searchTable(tableName, query.get(), columns); + return searchTable(tableName, archiveId, query.get(), columns); } else { - Path decompressFile = decompressDir.resolve(tableName).resolve("original"); - if (!Files.exists(decompressFile) || !Files.isRegularFile(decompressFile)) { - if (!decompressRecords(tableName)) { - return null; - } - log.info("Decompress records to %s", decompressFile.toString()); - } - - try { - return Files.newBufferedReader(decompressFile); - } - catch (IOException e) { - log.error(e, "Failed to get records for table %s", tableName); - return null; - } + return searchTable(tableName, archiveId, "*", columns); } +// else { +// Path decompressFile = decompressDir.resolve(tableName).resolve("original"); +// if (!Files.exists(decompressFile) || !Files.isRegularFile(decompressFile)) { +// if (!decompressRecords(tableName)) { +// return null; +// } +// log.info("Decompress records to %s", decompressFile.toString()); +// } +// +// try { +// return Files.newBufferedReader(decompressFile); +// } +// catch (IOException e) { +// log.error(e, "Failed to get records for table %s", tableName); +// return null; +// } +// } } - private BufferedReader searchTable(String tableName, String query, List columns) + private BufferedReader searchTable(String tableName, String archiveId, String query, List columns) { Path tableArchiveDir = Paths.get(config.getClpArchiveDir(), tableName); try { @@ -260,6 +292,8 @@ private BufferedReader searchTable(String tableName, String query, List argumentList.add(executablePath.toString()); argumentList.add("s"); argumentList.add(tableArchiveDir.toString()); + argumentList.add("--archive-id"); + argumentList.add(archiveId); argumentList.add(query); argumentList.add("--projection"); argumentList.addAll(columns); @@ -371,6 +405,7 @@ private Set handlePolymorphicType(Set columnHa for (ClpColumnHandle columnHandle : columnHandleList) { polymorphicColumnHandles.add(new ClpColumnHandle( columnHandle.getColumnName() + "_" + columnHandle.getColumnType().getDisplayName(), + columnHandle.getColumnName(), columnHandle.getColumnType(), columnHandle.isNullable())); } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java b/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java index 177506cfd1e52..04c3fa0e5fc45 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java @@ -27,26 +27,40 @@ public class ClpColumnHandle implements ColumnHandle { private final String columnName; + private final String originalColumnName; private final Type columnType; private final boolean nullable; @JsonCreator public ClpColumnHandle( @JsonProperty("columnName") String columnName, + @JsonProperty("columnType") String originalColumnName, @JsonProperty("columnType") Type columnType, @JsonProperty("nullable") boolean nullable) { this.columnName = columnName; + this.originalColumnName = originalColumnName; this.columnType = columnType; this.nullable = nullable; } + public ClpColumnHandle(String columnName, Type columnType, boolean nullable) + { + this(columnName, columnName, columnType, nullable); + } + @JsonProperty public String getColumnName() { return columnName; } + @JsonProperty + public String getOriginalColumnName() + { + return originalColumnName; + } + @JsonProperty public Type getColumnType() { diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java index c66df442b9a17..1e9bffb9863a8 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java @@ -49,8 +49,9 @@ public RecordSet getRecordSet(ConnectorTransactionHandle transactionHandle, } ImmutableList clpColumnHandles = handles.build(); return new ClpRecordSet(clpClient.getRecords(clpSplit.getTableName(), + clpSplit.getArchiveId(), clpSplit.getQuery(), - clpColumnHandles.stream().map(ClpColumnHandle::getColumnName).collect(ImmutableList.toImmutableList())), + clpColumnHandles.stream().map(ClpColumnHandle::getOriginalColumnName).collect(ImmutableList.toImmutableList())), clpClient.getConfig().isPolymorphicTypeEnabled(), clpColumnHandles); } } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java b/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java index 5b9778c8caf63..f13fbb80bae20 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java @@ -33,15 +33,18 @@ public class ClpSplit { private final String schemaName; private final String tableName; + private final String archiveId; private final Optional query; @JsonCreator public ClpSplit(@JsonProperty("schemaName") @Nullable String schemaName, @JsonProperty("tableName") @Nullable String tableName, + @JsonProperty("archiveId") @Nullable String archiveId, @JsonProperty("query") Optional query) { this.schemaName = schemaName; this.tableName = tableName; + this.archiveId = archiveId; this.query = query; } @@ -58,6 +61,12 @@ public String getTableName() return tableName; } + @JsonProperty + public String getArchiveId() + { + return archiveId; + } + @JsonProperty public Optional getQuery() { diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java b/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java index dc8fbcc25ed15..4d070be73b7f4 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java @@ -14,7 +14,6 @@ package com.yscope.presto; import com.facebook.presto.spi.ConnectorSession; -import com.facebook.presto.spi.ConnectorSplit; import com.facebook.presto.spi.ConnectorSplitSource; import com.facebook.presto.spi.ConnectorTableLayoutHandle; import com.facebook.presto.spi.FixedSplitSource; @@ -23,8 +22,7 @@ import javax.inject.Inject; -import java.util.Collections; -import java.util.List; +import java.util.stream.Collectors; public class ClpSplitManager implements ConnectorSplitManager @@ -48,9 +46,13 @@ public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHand if (!clpClient.listTables().contains(tableHandle.getTableName())) { throw new RuntimeException("Table no longer exists: " + tableHandle.getTableName()); } - List splits = - Collections.singletonList(new ClpSplit("default", tableHandle.getTableName(), layoutHandle.getQuery())); - return new FixedSplitSource(splits); + return new FixedSplitSource(clpClient.listArchiveIds(tableHandle.getTableName()) + .stream() + .map(archiveId -> new ClpSplit("default", + tableHandle.getTableName(), + archiveId, + layoutHandle.getQuery())) + .collect(Collectors.toList())); } } diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java index da955c500d8d0..fa79d7ebcc733 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java @@ -40,7 +40,7 @@ public void setUp() { ClpConfig config = new ClpConfig().setClpArchiveDir("src/test/resources/clp_archive") .setPolymorphicTypeEnabled(true) - .setClpExecutablePath("/usr/local/bin/clp-s"); + .setClpExecutablePath("/usr/local/bin/clp-s-projection"); metadata = new ClpMetadata(new ClpClient(config)); } diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java b/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java index 0a9c74683640e..8ce964e32533a 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java @@ -50,7 +50,7 @@ public void setUp() { ClpConfig config = new ClpConfig().setClpArchiveDir("src/test/resources/clp_archive") .setPolymorphicTypeEnabled(true) - .setClpExecutablePath("/usr/local/bin/clp-s"); + .setClpExecutablePath("/usr/local/bin/clp-s-projection"); clpClient = new ClpClient(config); clpClient.start(); } @@ -66,10 +66,11 @@ public void assertNull(ClpRecordCursor cursor, List indices) public void testTable1RecordCursor() { ClpRecordSetProvider recordSetProvider = new ClpRecordSetProvider(clpClient); + List archiveIds = clpClient.listArchiveIds("test_1_table"); ClpRecordSet recordSet = (ClpRecordSet) recordSetProvider.getRecordSet( ClpTransactionHandle.INSTANCE, SESSION, - new ClpSplit("default", "test_1_table", Optional.empty()), + new ClpSplit("default", "test_1_table", archiveIds.get(0), Optional.empty()), new ArrayList<>(clpClient.listColumns("test_1_table"))); assertNotNull(recordSet, "recordSet is null"); ClpRecordCursor cursor = (ClpRecordCursor) recordSet.cursor(); @@ -102,10 +103,11 @@ public void testTable1RecordCursor() public void testTable2RecordCursor() { ClpRecordSetProvider recordSetProvider = new ClpRecordSetProvider(clpClient); + List archiveIds = clpClient.listArchiveIds("test_2_table"); ClpRecordSet recordSet = (ClpRecordSet) recordSetProvider.getRecordSet( ClpTransactionHandle.INSTANCE, SESSION, - new ClpSplit("default", "test_2_table", Optional.empty()), + new ClpSplit("default", "test_2_table", archiveIds.get(0), Optional.empty()), new ArrayList<>(clpClient.listColumns("test_2_table"))); assertNotNull(recordSet, "recordSet is null"); ClpRecordCursor cursor = (ClpRecordCursor) recordSet.cursor(); @@ -132,17 +134,18 @@ public void testPredicate() constant(1L, BigintType.BIGINT))); Map assignments = Map.of( new VariableReferenceExpression(Optional.empty(), "a_bigint", BigintType.BIGINT), - new ClpColumnHandle("a_bigint", BigintType.BIGINT, false)); + new ClpColumnHandle("a_bigint", "a", BigintType.BIGINT, false)); Optional query = callExpression.accept(new ClpFilterToKqlConverter( new FunctionResolution(functionAndTypeManager.getFunctionAndTypeResolver()), functionAndTypeManager, functionAndTypeManager, assignments), null).getDefinition(); assertTrue(query.isPresent()); ClpRecordSetProvider recordSetProvider = new ClpRecordSetProvider(clpClient); + List archiveIds = clpClient.listArchiveIds("test_1_table"); ClpRecordSet recordSet = (ClpRecordSet) recordSetProvider.getRecordSet( ClpTransactionHandle.INSTANCE, SESSION, - new ClpSplit("default", "test_1_table", query), + new ClpSplit("default", "test_1_table", archiveIds.get(0), query), new ArrayList<>(clpClient.listColumns("test_1_table"))); assertNotNull(recordSet, "recordSet is null"); ClpRecordCursor cursor = (ClpRecordCursor) recordSet.cursor(); From d99aefe0386f311704ed76eb06502e2b5674d6a0 Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 13 Sep 2024 22:06:53 -0400 Subject: [PATCH 054/126] fix a bug --- presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java b/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java index 04c3fa0e5fc45..4de90d6b7e059 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java @@ -34,7 +34,7 @@ public class ClpColumnHandle @JsonCreator public ClpColumnHandle( @JsonProperty("columnName") String columnName, - @JsonProperty("columnType") String originalColumnName, + @JsonProperty("originalColumnName") String originalColumnName, @JsonProperty("columnType") Type columnType, @JsonProperty("nullable") boolean nullable) { From 9dad3423010e8eb5c62cabe2a6a40ccbf1dec6b1 Mon Sep 17 00:00:00 2001 From: wraymo Date: Mon, 16 Sep 2024 11:26:28 -0400 Subject: [PATCH 055/126] remove duplicate columns for projection --- presto-clp/src/main/java/com/yscope/presto/ClpClient.java | 6 ++++-- .../main/java/com/yscope/presto/ClpRecordSetProvider.java | 6 +++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 1523b02c22693..71c0a0fffb4c7 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -295,8 +295,10 @@ private BufferedReader searchTable(String tableName, String archiveId, String qu argumentList.add("--archive-id"); argumentList.add(archiveId); argumentList.add(query); - argumentList.add("--projection"); - argumentList.addAll(columns); + if (!columns.isEmpty()) { + argumentList.add("--projection"); + argumentList.addAll(columns); + } log.info("Argument list: %s", argumentList.toString()); ProcessBuilder processBuilder = new ProcessBuilder(argumentList); Process process = processBuilder.start(); diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java index 1e9bffb9863a8..84070d1387e9b 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java @@ -20,6 +20,7 @@ import com.facebook.presto.spi.connector.ConnectorRecordSetProvider; import com.facebook.presto.spi.connector.ConnectorTransactionHandle; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; import javax.inject.Inject; @@ -51,7 +52,10 @@ public RecordSet getRecordSet(ConnectorTransactionHandle transactionHandle, return new ClpRecordSet(clpClient.getRecords(clpSplit.getTableName(), clpSplit.getArchiveId(), clpSplit.getQuery(), - clpColumnHandles.stream().map(ClpColumnHandle::getOriginalColumnName).collect(ImmutableList.toImmutableList())), + clpColumnHandles.stream() + .map(ClpColumnHandle::getOriginalColumnName) + .collect(ImmutableSet.toImmutableSet()) + .asList()), clpClient.getConfig().isPolymorphicTypeEnabled(), clpColumnHandles); } } From 49aed85a60dcd4ca4fe5c2c08e94a7073814b1ce Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 17 Sep 2024 16:58:37 -0400 Subject: [PATCH 056/126] destroy clp-s process when closing the record cursor --- .../java/com/yscope/presto/ClpClient.java | 38 ++++++++----------- .../com/yscope/presto/ClpRecordCursor.java | 17 ++++++++- .../java/com/yscope/presto/ClpRecordSet.java | 8 ++-- 3 files changed, 34 insertions(+), 29 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 71c0a0fffb4c7..c9a04bc50e7a7 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -253,7 +253,7 @@ public Set listColumns(String tableName) return polymorphicColumnHandles; } - public BufferedReader getRecords(String tableName, String archiveId, Optional query, List columns) + public ProcessBuilder getRecords(String tableName, String archiveId, Optional query, List columns) { if (!listTables().contains(tableName)) { return null; @@ -284,30 +284,22 @@ public BufferedReader getRecords(String tableName, String archiveId, Optional columns) + private ProcessBuilder searchTable(String tableName, String archiveId, String query, List columns) { Path tableArchiveDir = Paths.get(config.getClpArchiveDir(), tableName); - try { - List argumentList = new ArrayList<>(); - argumentList.add(executablePath.toString()); - argumentList.add("s"); - argumentList.add(tableArchiveDir.toString()); - argumentList.add("--archive-id"); - argumentList.add(archiveId); - argumentList.add(query); - if (!columns.isEmpty()) { - argumentList.add("--projection"); - argumentList.addAll(columns); - } - log.info("Argument list: %s", argumentList.toString()); - ProcessBuilder processBuilder = new ProcessBuilder(argumentList); - Process process = processBuilder.start(); - return new BufferedReader(new InputStreamReader(process.getInputStream())); - } - catch (IOException e) { - log.error(e, "Failed to search records for table %s", tableName); - return null; - } + List argumentList = new ArrayList<>(); + argumentList.add(executablePath.toString()); + argumentList.add("s"); + argumentList.add(tableArchiveDir.toString()); + argumentList.add("--archive-id"); + argumentList.add(archiveId); + argumentList.add(query); + if (!columns.isEmpty()) { + argumentList.add("--projection"); + argumentList.addAll(columns); + } + log.info("Argument list: %s", argumentList.toString()); + return new ProcessBuilder(argumentList); } private boolean decompressRecords(String tableName) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java index ceee661c8921f..f0e75d45d2ac8 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java @@ -13,6 +13,7 @@ */ package com.yscope.presto; +import com.facebook.airlift.log.Logger; import com.facebook.presto.common.block.BlockBuilder; import com.facebook.presto.common.type.Type; import com.facebook.presto.spi.RecordCursor; @@ -22,6 +23,8 @@ import io.airlift.slice.Slices; import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -37,14 +40,23 @@ public class ClpRecordCursor implements RecordCursor { + private static final Logger log = Logger.get(ClpRecordCursor.class); private final BufferedReader reader; + private final Process process; private final boolean isPolymorphicTypeEnabled; private final List columnHandles; private final List fields; - public ClpRecordCursor(BufferedReader reader, boolean isPolymorphicTypeEnabled, List columnHandles) + public ClpRecordCursor(ProcessBuilder processBuilder, boolean isPolymorphicTypeEnabled, List columnHandles) { - this.reader = reader; + try { + this.process = processBuilder.start(); + } + catch (IOException e) { + log.error(e, "Failed to search records"); + throw new RuntimeException(e); + } + this.reader = new BufferedReader(new InputStreamReader(process.getInputStream())); this.isPolymorphicTypeEnabled = isPolymorphicTypeEnabled; this.columnHandles = columnHandles; this.fields = new ArrayList<>(columnHandles.size()); @@ -149,6 +161,7 @@ public boolean isNull(int field) @Override public void close() { + process.destroy(); } private void parseLine(JsonNode node, String prefix) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java index 990cfcc61dac2..191298e1d96c8 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java @@ -26,13 +26,13 @@ public class ClpRecordSet implements RecordSet { - private final BufferedReader reader; + private final ProcessBuilder processBuilder; private final List columnHandles; private final boolean isPolymorphicTypeEnabled; - public ClpRecordSet(BufferedReader reader, boolean isPolymorphicTypeEnabled, List columnHandles) + public ClpRecordSet(ProcessBuilder processBuilder, boolean isPolymorphicTypeEnabled, List columnHandles) { - this.reader = requireNonNull(reader, "reader is null"); + this.processBuilder = requireNonNull(processBuilder, "process builder is null"); this.isPolymorphicTypeEnabled = isPolymorphicTypeEnabled; this.columnHandles = requireNonNull(columnHandles, "column handles is null"); } @@ -46,6 +46,6 @@ public List getColumnTypes() @Override public RecordCursor cursor() { - return new ClpRecordCursor(reader, isPolymorphicTypeEnabled, columnHandles); + return new ClpRecordCursor(processBuilder, isPolymorphicTypeEnabled, columnHandles); } } From 6bdec9d3526b5fd7ddb0be40deea3587d42fe98e Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 19 Sep 2024 15:41:29 -0400 Subject: [PATCH 057/126] remove unused import --- presto-clp/src/main/java/com/yscope/presto/ClpClient.java | 2 -- presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java | 1 - 2 files changed, 3 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index c9a04bc50e7a7..a0c0be5aa0b7c 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -30,11 +30,9 @@ import javax.annotation.PreDestroy; import javax.inject.Inject; -import java.io.BufferedReader; import java.io.DataInputStream; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java index 191298e1d96c8..f561d0b0d2c2e 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java @@ -18,7 +18,6 @@ import com.facebook.presto.spi.RecordSet; import com.google.common.collect.ImmutableList; -import java.io.BufferedReader; import java.util.List; import static java.util.Objects.requireNonNull; From 98136139be9fce3a96e20c415270f6470737c121 Mon Sep 17 00:00:00 2001 From: wraymo Date: Mon, 30 Sep 2024 01:15:51 +0000 Subject: [PATCH 058/126] add concurrent split support --- .../presto_cpp/main/types/PrestoToVeloxConnector.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp index 8854701c6a606..811ef98e5549c 100644 --- a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp +++ b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp @@ -1564,7 +1564,10 @@ ClpPrestoToVeloxConnector::toVeloxSplit( VELOX_CHECK_NOT_NULL( clpSplit, "Unexpected split type {}", connectorSplit->_type); return std::make_unique( - catalogId, clpSplit->schemaName, clpSplit->tableName); + catalogId, + clpSplit->schemaName, + clpSplit->tableName, + clpSplit->archiveId); } std::unique_ptr From 79ac2811167d9736c56153feaf6dc201dcb61c49 Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 27 Sep 2024 21:28:00 -0400 Subject: [PATCH 059/126] close the buffer when closing teh CLP Record curor --- .../src/main/java/com/yscope/presto/ClpRecordCursor.java | 6 ++++++ presto_query.py | 0 2 files changed, 6 insertions(+) create mode 100644 presto_query.py diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java index f0e75d45d2ac8..aa5d2fa38fd77 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java @@ -161,6 +161,12 @@ public boolean isNull(int field) @Override public void close() { + try { + reader.close(); + } + catch (IOException e) { + log.warn(e, "Failed to close reader"); + } process.destroy(); } diff --git a/presto_query.py b/presto_query.py new file mode 100644 index 0000000000000..e69de29bb2d1d From 6d059cf0b5a77f97a3facc7dfe7366ff1645d552 Mon Sep 17 00:00:00 2001 From: wraymo Date: Mon, 30 Sep 2024 01:25:37 +0000 Subject: [PATCH 060/126] remove unused file --- presto_query.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 presto_query.py diff --git a/presto_query.py b/presto_query.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 From 440e7a0ba67c7a35eb42f7453983f39a11ed68b6 Mon Sep 17 00:00:00 2001 From: wraymo Date: Wed, 2 Oct 2024 21:50:25 -0400 Subject: [PATCH 061/126] resolve several issues --- .../dependency-reduced-pom.xml | 4 ++-- presto-cli/dependency-reduced-pom.xml | 2 +- presto-clp/pom.xml | 2 +- .../dependency-reduced-pom.xml | 2 +- presto-server/src/main/provisio/presto.xml | 6 ++++++ .../dependency-reduced-pom.xml | 2 +- presto-verifier/dependency-reduced-pom.xml | 18 +++++++++--------- 7 files changed, 21 insertions(+), 15 deletions(-) diff --git a/presto-benchmark-driver/dependency-reduced-pom.xml b/presto-benchmark-driver/dependency-reduced-pom.xml index 1ab11a3a4ff48..7b41f582da5dd 100644 --- a/presto-benchmark-driver/dependency-reduced-pom.xml +++ b/presto-benchmark-driver/dependency-reduced-pom.xml @@ -3,7 +3,7 @@ presto-root com.facebook.presto - 0.289-SNAPSHOT + 0.290-SNAPSHOT 4.0.0 presto-benchmark-driver @@ -54,7 +54,7 @@ com.facebook.presto presto-testng-services - 0.289-SNAPSHOT + 0.290-SNAPSHOT test diff --git a/presto-cli/dependency-reduced-pom.xml b/presto-cli/dependency-reduced-pom.xml index bfdb7afcff90d..e7fc4f4cd553e 100644 --- a/presto-cli/dependency-reduced-pom.xml +++ b/presto-cli/dependency-reduced-pom.xml @@ -3,7 +3,7 @@ presto-root com.facebook.presto - 0.289-SNAPSHOT + 0.290-SNAPSHOT 4.0.0 presto-cli diff --git a/presto-clp/pom.xml b/presto-clp/pom.xml index 2174a44ba13a5..942b258de09b4 100644 --- a/presto-clp/pom.xml +++ b/presto-clp/pom.xml @@ -6,7 +6,7 @@ com.facebook.presto presto-root - 0.289-SNAPSHOT + 0.290-SNAPSHOT com.yscope.presto diff --git a/presto-product-tests/dependency-reduced-pom.xml b/presto-product-tests/dependency-reduced-pom.xml index 26cdb4dc7cb46..d01741629acfe 100644 --- a/presto-product-tests/dependency-reduced-pom.xml +++ b/presto-product-tests/dependency-reduced-pom.xml @@ -3,7 +3,7 @@ presto-root com.facebook.presto - 0.289-SNAPSHOT + 0.290-SNAPSHOT 4.0.0 presto-product-tests diff --git a/presto-server/src/main/provisio/presto.xml b/presto-server/src/main/provisio/presto.xml index 4e82ac23b9a1f..06f7caf37d093 100644 --- a/presto-server/src/main/provisio/presto.xml +++ b/presto-server/src/main/provisio/presto.xml @@ -230,6 +230,12 @@ + + + + + + diff --git a/presto-testing-server-launcher/dependency-reduced-pom.xml b/presto-testing-server-launcher/dependency-reduced-pom.xml index 79f8bf68bda4b..7a36a589bee01 100644 --- a/presto-testing-server-launcher/dependency-reduced-pom.xml +++ b/presto-testing-server-launcher/dependency-reduced-pom.xml @@ -3,7 +3,7 @@ presto-root com.facebook.presto - 0.289-SNAPSHOT + 0.290-SNAPSHOT 4.0.0 presto-testing-server-launcher diff --git a/presto-verifier/dependency-reduced-pom.xml b/presto-verifier/dependency-reduced-pom.xml index 0e41ca9358ac3..cda43d6a5266f 100644 --- a/presto-verifier/dependency-reduced-pom.xml +++ b/presto-verifier/dependency-reduced-pom.xml @@ -3,7 +3,7 @@ presto-root com.facebook.presto - 0.289-SNAPSHOT + 0.290-SNAPSHOT 4.0.0 presto-verifier @@ -85,20 +85,20 @@ com.facebook.presto presto-main - 0.289-SNAPSHOT + 0.290-SNAPSHOT test-jar test com.facebook.presto presto-memory - 0.289-SNAPSHOT + 0.290-SNAPSHOT test com.facebook.presto presto-tests - 0.289-SNAPSHOT + 0.290-SNAPSHOT test @@ -122,7 +122,7 @@ com.facebook.presto presto-tpch - 0.289-SNAPSHOT + 0.290-SNAPSHOT test @@ -134,7 +134,7 @@ com.facebook.airlift testing - 0.209 + 0.215 test @@ -188,13 +188,13 @@ com.facebook.presto presto-testng-services - 0.289-SNAPSHOT + 0.290-SNAPSHOT test com.facebook.airlift launcher - 0.209 + 0.215 tar.gz bin provided @@ -202,7 +202,7 @@ com.facebook.airlift launcher - 0.209 + 0.215 tar.gz properties provided From c089cdb5a998dabaa643e8a2c21352799a961371 Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 2 Jan 2025 09:28:08 -0500 Subject: [PATCH 062/126] add support for reading metadata from mysql --- presto-clp/pom.xml | 5 + .../java/com/yscope/presto/ClpClient.java | 171 ++++++++++++------ .../java/com/yscope/presto/ClpConfig.java | 78 ++++++++ 3 files changed, 194 insertions(+), 60 deletions(-) diff --git a/presto-clp/pom.xml b/presto-clp/pom.xml index 942b258de09b4..375988c713b2c 100644 --- a/presto-clp/pom.xml +++ b/presto-clp/pom.xml @@ -20,6 +20,11 @@ + + mysql + mysql-connector-java + + com.facebook.airlift bootstrap diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index a0c0be5aa0b7c..9d4796a4d5a35 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -21,6 +21,9 @@ import com.facebook.presto.common.type.Type; import com.facebook.presto.common.type.VarcharType; import com.github.luben.zstd.ZstdInputStream; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.yscope.presto.schema.SchemaNode; @@ -43,6 +46,11 @@ import java.nio.file.Paths; import java.nio.file.SimpleFileVisitor; import java.nio.file.attribute.BasicFileAttributes; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashSet; @@ -52,25 +60,35 @@ import java.util.Set; import static java.util.Objects.requireNonNull; +import static java.util.concurrent.TimeUnit.SECONDS; public class ClpClient { private static final Logger log = Logger.get(ClpClient.class); private final ClpConfig config; private final Path executablePath; - private final Map> tableNameToColumnHandles; + private final LoadingCache> columnHandleCache; + private final LoadingCache> tableNameCache; private final Map> tableNameToArchiveIds; private final Path decompressDir; - private Set tableNames; @Inject public ClpClient(ClpConfig config) { this.config = requireNonNull(config, "config is null"); - this.tableNameToColumnHandles = new HashMap<>(); this.tableNameToArchiveIds = new HashMap<>(); this.executablePath = getExecutablePath(); this.decompressDir = Paths.get(System.getProperty("java.io.tmpdir"), "clp_decompress"); + + this.columnHandleCache = CacheBuilder.newBuilder() + .expireAfterWrite(config.getMetadataExpireInterval(), SECONDS) + .refreshAfterWrite(config.getMetadataRefreshInterval(), SECONDS) + .build(CacheLoader.from(this::loadTableSchema)); + + this.tableNameCache = CacheBuilder.newBuilder() + .expireAfterWrite(1, SECONDS) // TODO: Configure + .refreshAfterWrite(1, SECONDS) + .build(CacheLoader.from(this::loadTable)); } @PostConstruct @@ -154,34 +172,103 @@ private Path getExecutablePathFromEnvironment() return executablePath; } - public Set listTables() + public Set loadTableSchema(String tableName) { - if (tableNames != null) { - return tableNames; + Connection connection = null; + LinkedHashSet columnHandles = new LinkedHashSet<>(); + try { + connection = DriverManager.getConnection(config.getMetadataDbUrl(), config.getMetadataDbUser(), config.getMetadataDbPassword()); + Statement statement = connection.createStatement(); + + String query = "SELECT * FROM" + config.getMetadataTablePrefix() + tableName; + ResultSet resultSet = statement.executeQuery(query); + + while (resultSet.next()) { + String columnName = resultSet.getString("name"); + SchemaNode.NodeType columnType = SchemaNode.NodeType.fromType(resultSet.getByte("type")); + Type prestoType = null; + switch (columnType) { + case Integer: + prestoType = BigintType.BIGINT; + break; + case Float: + prestoType = DoubleType.DOUBLE; + break; + case ClpString: + case VarString: + case DateString: + case NullValue: + prestoType = VarcharType.VARCHAR; + break; + case UnstructuredArray: + prestoType = new ArrayType(VarcharType.VARCHAR); + break; + case Boolean: + prestoType = BooleanType.BOOLEAN; + break; + default: + break; + } + columnHandles.add(new ClpColumnHandle(columnName, prestoType, true)); + } } - if (config.getClpArchiveDir() == null || config.getClpArchiveDir().isEmpty()) { - tableNames = ImmutableSet.of(); - return tableNames; + catch (SQLException e) { + log.error(e, "Failed to connect to metadata database"); + return ImmutableSet.of(); } - Path archiveDir = Paths.get(config.getClpArchiveDir()); - if (!Files.exists(archiveDir) || !Files.isDirectory(archiveDir)) { - tableNames = ImmutableSet.of(); - return tableNames; + finally { + try { + if (connection != null) { + connection.close(); + } + } + catch (SQLException ex) { + log.warn(ex, "Failed to close metadata database connection"); + } } + if (!config.isPolymorphicTypeEnabled()) { + return columnHandles; + } + return handlePolymorphicType(columnHandles); + } - try (DirectoryStream stream = Files.newDirectoryStream(archiveDir)) { - ImmutableSet.Builder tableNames = ImmutableSet.builder(); - for (Path path : stream) { - if (Files.isDirectory(path)) { - tableNames.add(path.getFileName().toString()); - } + public Set loadTable(String tableName) + { + ImmutableSet.Builder tableNames = ImmutableSet.builder(); + Connection connection = null; + try { + connection = DriverManager.getConnection(config.getMetadataDbUrl(), config.getMetadataDbUser(), config.getMetadataDbPassword()); + Statement statement = connection.createStatement(); + + String query = "SHOW TABLES"; + ResultSet resultSet = statement.executeQuery(query); + + // Processing the results + String databaseName = config.getMetadataDbUrl().substring(config.getMetadataDbUrl().lastIndexOf('/') + 1); + while (resultSet.next()) { + tableNames.add(resultSet.getString("Tables_in_" + databaseName).substring(config.getMetadataTablePrefix().length())); } - this.tableNames = tableNames.build(); } - catch (Exception e) { - this.tableNames = ImmutableSet.of(); + catch (SQLException e) { + log.error(e, "Failed to connect to metadata database"); + } + finally { + // Closing the connection + try { + if (connection != null) { + connection.close(); + } + } + catch (SQLException ex) { + log.warn(ex, "Failed to close metadata database connection"); + } } - return this.tableNames; + return tableNames.build(); + } + + public Set listTables() + { + return tableNameCache.getUnchecked("default"); } public List listArchiveIds(String tableName) @@ -212,43 +299,7 @@ public List listArchiveIds(String tableName) public Set listColumns(String tableName) { - if (tableNameToColumnHandles.containsKey(tableName)) { - return tableNameToColumnHandles.get(tableName); - } - - Path tableDir = Paths.get(config.getClpArchiveDir(), tableName); - LinkedHashSet columnHandles = new LinkedHashSet<>(); - if (!Files.exists(tableDir) || !Files.isDirectory(tableDir)) { - return ImmutableSet.of(); - } - - try (DirectoryStream stream = Files.newDirectoryStream(tableDir)) { - for (Path path : stream) { - if (Files.isRegularFile(path)) { - continue; - } - - // For each directory, get schema_maps file under it - Path schemaMapsFile = path.resolve("schema_tree"); - if (!Files.exists(schemaMapsFile) || !Files.isRegularFile(schemaMapsFile)) { - continue; - } - - columnHandles.addAll(parseSchemaTreeFile(schemaMapsFile)); - } - } - catch (Exception e) { - tableNameToColumnHandles.put(tableName, ImmutableSet.of()); - return ImmutableSet.of(); - } - - if (!config.isPolymorphicTypeEnabled()) { - tableNameToColumnHandles.put(tableName, columnHandles); - return columnHandles; - } - Set polymorphicColumnHandles = handlePolymorphicType(columnHandles); - tableNameToColumnHandles.put(tableName, polymorphicColumnHandles); - return polymorphicColumnHandles; + return columnHandleCache.getUnchecked(tableName); } public ProcessBuilder getRecords(String tableName, String archiveId, Optional query, List columns) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java index dfd39128d50bf..b354dab9ba137 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java @@ -20,6 +20,12 @@ public class ClpConfig private String clpExecutablePath; private String clpArchiveDir; private boolean polymorphicTypeEnabled; + private String metadataDbUrl; + private String metadataDbUser; + private String metadataDbPassword; + private String metadataTablePrefix; + private long metadataRefreshInterval; + private long metadataExpireInterval; public String getClpExecutablePath() { @@ -56,4 +62,76 @@ public ClpConfig setPolymorphicTypeEnabled(boolean polymorphicTypeEnabled) this.polymorphicTypeEnabled = polymorphicTypeEnabled; return this; } + + public String getMetadataDbUrl() + { + return metadataDbUrl; + } + + @Config("metadata-db-url") + public ClpConfig setMetadataDbUrl(String metadataDbUrl) + { + this.metadataDbUrl = metadataDbUrl; + return this; + } + + public String getMetadataDbUser() + { + return metadataDbUser; + } + + @Config("metadata-db-user") + public ClpConfig setMetadataDbUser(String metadataDbUser) + { + this.metadataDbUser = metadataDbUser; + return this; + } + + public String getMetadataDbPassword() + { + return metadataDbPassword; + } + + @Config("metadata-db-password") + public ClpConfig setMetadataDbPassword(String metadataDbPassword) + { + this.metadataDbPassword = metadataDbPassword; + return this; + } + + public String getMetadataTablePrefix() + { + return metadataTablePrefix; + } + + @Config("metadata-table-prefix") + public ClpConfig setMetadataTablePrefix(String metadataTablePrefix) + { + this.metadataTablePrefix = metadataTablePrefix; + return this; + } + + public long getMetadataRefreshInterval() + { + return metadataRefreshInterval; + } + + @Config("metadata-refresh-interval") + public ClpConfig setMetadataRefreshInterval(long metadataRefreshInterval) + { + this.metadataRefreshInterval = metadataRefreshInterval; + return this; + } + + public long getMetadataExpireInterval() + { + return metadataExpireInterval; + } + + @Config("metadata-expire-interval") + public ClpConfig setMetadataExpireInterval(long metadataExpireInterval) + { + this.metadataExpireInterval = metadataExpireInterval; + return this; + } } From dddc29b4bdb5e38dc53f819b9bad7d15668f1649 Mon Sep 17 00:00:00 2001 From: wraymo Date: Mon, 20 Jan 2025 15:19:04 -0500 Subject: [PATCH 063/126] add support for searching from S3 and reading schema from MySQL --- .../java/com/yscope/presto/ClpClient.java | 127 +++++++++++------- .../java/com/yscope/presto/ClpConfig.java | 111 ++++++++++----- 2 files changed, 154 insertions(+), 84 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 9d4796a4d5a35..8c1661311ec36 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -64,30 +64,37 @@ public class ClpClient { + public static final String TableMetadataPrefix = "table_metadata_"; + public static final String ArchiveTableSuffix = "archives"; private static final Logger log = Logger.get(ClpClient.class); private final ClpConfig config; + private final ClpConfig.FileSource fileSource; private final Path executablePath; + private final Path decompressDir; private final LoadingCache> columnHandleCache; private final LoadingCache> tableNameCache; - private final Map> tableNameToArchiveIds; - private final Path decompressDir; @Inject public ClpClient(ClpConfig config) { this.config = requireNonNull(config, "config is null"); - this.tableNameToArchiveIds = new HashMap<>(); - this.executablePath = getExecutablePath(); - this.decompressDir = Paths.get(System.getProperty("java.io.tmpdir"), "clp_decompress"); - + this.fileSource = config.getFileSource(); + if (fileSource == ClpConfig.FileSource.LOCAL) { + this.executablePath = getExecutablePath(); + this.decompressDir = Paths.get(System.getProperty("java.io.tmpdir"), "clp_decompress"); + } + else { + this.executablePath = null; + this.decompressDir = null; + } this.columnHandleCache = CacheBuilder.newBuilder() .expireAfterWrite(config.getMetadataExpireInterval(), SECONDS) .refreshAfterWrite(config.getMetadataRefreshInterval(), SECONDS) .build(CacheLoader.from(this::loadTableSchema)); this.tableNameCache = CacheBuilder.newBuilder() - .expireAfterWrite(1, SECONDS) // TODO: Configure - .refreshAfterWrite(1, SECONDS) + .expireAfterWrite(config.getMetadataExpireInterval(), SECONDS) // TODO: Configure + .refreshAfterWrite(config.getMetadataRefreshInterval(), SECONDS) .build(CacheLoader.from(this::loadTable)); } @@ -180,7 +187,7 @@ public Set loadTableSchema(String tableName) connection = DriverManager.getConnection(config.getMetadataDbUrl(), config.getMetadataDbUser(), config.getMetadataDbPassword()); Statement statement = connection.createStatement(); - String query = "SELECT * FROM" + config.getMetadataTablePrefix() + tableName; + String query = "SELECT * FROM" + config.getMetadataTablePrefix() + TableMetadataPrefix + tableName; ResultSet resultSet = statement.executeQuery(query); while (resultSet.next()) { @@ -232,7 +239,7 @@ public Set loadTableSchema(String tableName) return handlePolymorphicType(columnHandles); } - public Set loadTable(String tableName) + public Set loadTable(String schemaName) { ImmutableSet.Builder tableNames = ImmutableSet.builder(); Connection connection = null; @@ -245,8 +252,12 @@ public Set loadTable(String tableName) // Processing the results String databaseName = config.getMetadataDbUrl().substring(config.getMetadataDbUrl().lastIndexOf('/') + 1); + String tableNamePrefix = config.getMetadataTablePrefix() + TableMetadataPrefix; while (resultSet.next()) { - tableNames.add(resultSet.getString("Tables_in_" + databaseName).substring(config.getMetadataTablePrefix().length())); + String tableName = resultSet.getString("Tables_in_" + databaseName); + if (tableName.startsWith(config.getMetadataTablePrefix()) && tableName.length() > tableNamePrefix.length()) { + tableNames.add(tableName.substring(tableNamePrefix.length())); + } } } catch (SQLException e) { @@ -273,27 +284,56 @@ public Set listTables() public List listArchiveIds(String tableName) { - if (tableNameToArchiveIds.containsKey(tableName)) { - return tableNameToArchiveIds.get(tableName); - } - Path tableDir = Paths.get(config.getClpArchiveDir(), tableName); - if (!Files.exists(tableDir) || !Files.isDirectory(tableDir)) { - return ImmutableList.of(); - } + if (fileSource == ClpConfig.FileSource.LOCAL) { + Path tableDir = Paths.get(config.getClpArchiveDir(), tableName); + if (!Files.exists(tableDir) || !Files.isDirectory(tableDir)) { + return ImmutableList.of(); + } - try (DirectoryStream stream = Files.newDirectoryStream(tableDir)) { - ImmutableList.Builder archiveIds = ImmutableList.builder(); - for (Path path : stream) { - if (Files.isDirectory(path)) { - archiveIds.add(path.getFileName().toString()); + try (DirectoryStream stream = Files.newDirectoryStream(tableDir)) { + ImmutableList.Builder archiveIds = ImmutableList.builder(); + for (Path path : stream) { + if (Files.isDirectory(path)) { + archiveIds.add(path.getFileName().toString()); + } } + return archiveIds.build(); + } + catch (Exception e) { + return ImmutableList.of(); } - List archiveIdsList = archiveIds.build(); - tableNameToArchiveIds.put(tableName, archiveIdsList); - return archiveIdsList; } - catch (Exception e) { - return ImmutableList.of(); + else { + String bucketName = config.getS3Bucket(); + Connection connection = null; + try { + connection = DriverManager.getConnection(config.getMetadataDbUrl(), config.getMetadataDbUser(), config.getMetadataDbPassword()); + Statement statement = connection.createStatement(); + + String query = "SELECT id FROM " + config.getMetadataTablePrefix() + ArchiveTableSuffix; + ResultSet resultSet = statement.executeQuery(query); + + ImmutableList.Builder archiveIds = ImmutableList.builder(); + while (resultSet.next()) { + archiveIds.add(resultSet.getString("id")); + } + return archiveIds.build(); + } + catch (SQLException e) { + log.error(e, "Failed to connect to metadata database"); + return ImmutableList.of(); + } + finally { + // Closing the connection + try { + if (connection != null) { + connection.close(); + } + } + catch (SQLException ex) { + log.warn(ex, "Failed to close metadata database connection"); + } + } } } @@ -308,33 +348,15 @@ public ProcessBuilder getRecords(String tableName, String archiveId, Optional searchTable(tableName, archiveId, s, columns)) + .orElseGet(() -> searchTable(tableName, archiveId, "*", columns)); } private ProcessBuilder searchTable(String tableName, String archiveId, String query, List columns) { + if (fileSource == ClpConfig.FileSource.S3) { + throw new RuntimeException("Cannot handle S3 source"); + } Path tableArchiveDir = Paths.get(config.getClpArchiveDir(), tableName); List argumentList = new ArrayList<>(); argumentList.add(executablePath.toString()); @@ -353,6 +375,9 @@ private ProcessBuilder searchTable(String tableName, String archiveId, String qu private boolean decompressRecords(String tableName) { + if (fileSource == ClpConfig.FileSource.S3) { + throw new RuntimeException("Cannot handle S3 source"); + } Path tableDecompressDir = decompressDir.resolve(tableName); Path tableArchiveDir = Paths.get(config.getClpArchiveDir(), tableName); diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java index b354dab9ba137..63a9bacfbda6c 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java @@ -17,8 +17,12 @@ public class ClpConfig { - private String clpExecutablePath; - private String clpArchiveDir; + public enum FileSource + { + LOCAL, + S3 + } + private boolean polymorphicTypeEnabled; private String metadataDbUrl; private String metadataDbUser; @@ -26,37 +30,18 @@ public class ClpConfig private String metadataTablePrefix; private long metadataRefreshInterval; private long metadataExpireInterval; - - public String getClpExecutablePath() - { - return clpExecutablePath; - } - - @Config("executable-path") - public ClpConfig setClpExecutablePath(String clpExecutablePath) - { - this.clpExecutablePath = clpExecutablePath; - return this; - } - - public String getClpArchiveDir() - { - return clpArchiveDir; - } - - @Config("archive-dir") - public ClpConfig setClpArchiveDir(String clpArchiveDir) - { - this.clpArchiveDir = clpArchiveDir; - return this; - } + private FileSource fileSource = FileSource.LOCAL; + private String clpExecutablePath; + private String clpArchiveDir; + private String s3Bucket; + private String s3KeyPrefix; public boolean isPolymorphicTypeEnabled() { return polymorphicTypeEnabled; } - @Config("polymorphic-type-enabled") + @Config("clp.polymorphic-type-enabled") public ClpConfig setPolymorphicTypeEnabled(boolean polymorphicTypeEnabled) { this.polymorphicTypeEnabled = polymorphicTypeEnabled; @@ -68,7 +53,7 @@ public String getMetadataDbUrl() return metadataDbUrl; } - @Config("metadata-db-url") + @Config("clp.metadata-db-url") public ClpConfig setMetadataDbUrl(String metadataDbUrl) { this.metadataDbUrl = metadataDbUrl; @@ -80,7 +65,7 @@ public String getMetadataDbUser() return metadataDbUser; } - @Config("metadata-db-user") + @Config("clp.metadata-db-user") public ClpConfig setMetadataDbUser(String metadataDbUser) { this.metadataDbUser = metadataDbUser; @@ -92,7 +77,7 @@ public String getMetadataDbPassword() return metadataDbPassword; } - @Config("metadata-db-password") + @Config("clp.metadata-db-password") public ClpConfig setMetadataDbPassword(String metadataDbPassword) { this.metadataDbPassword = metadataDbPassword; @@ -104,7 +89,7 @@ public String getMetadataTablePrefix() return metadataTablePrefix; } - @Config("metadata-table-prefix") + @Config("clp.metadata-table-prefix") public ClpConfig setMetadataTablePrefix(String metadataTablePrefix) { this.metadataTablePrefix = metadataTablePrefix; @@ -116,7 +101,7 @@ public long getMetadataRefreshInterval() return metadataRefreshInterval; } - @Config("metadata-refresh-interval") + @Config("clp.metadata-refresh-interval") public ClpConfig setMetadataRefreshInterval(long metadataRefreshInterval) { this.metadataRefreshInterval = metadataRefreshInterval; @@ -128,10 +113,70 @@ public long getMetadataExpireInterval() return metadataExpireInterval; } - @Config("metadata-expire-interval") + @Config("clp.metadata-expire-interval") public ClpConfig setMetadataExpireInterval(long metadataExpireInterval) { this.metadataExpireInterval = metadataExpireInterval; return this; } + + public FileSource getFileSource() + { + return fileSource; + } + + @Config("clp.file-source") + public ClpConfig setFileSource(FileSource fileSource) + { + this.fileSource = fileSource; + return this; + } + + public String getClpExecutablePath() + { + return clpExecutablePath; + } + + @Config("clp.executable-path") + public ClpConfig setClpExecutablePath(String clpExecutablePath) + { + this.clpExecutablePath = clpExecutablePath; + return this; + } + + public String getClpArchiveDir() + { + return clpArchiveDir; + } + + @Config("clp.archive-dir") + public ClpConfig setClpArchiveDir(String clpArchiveDir) + { + this.clpArchiveDir = clpArchiveDir; + return this; + } + + public String getS3Bucket() + { + return s3Bucket; + } + + @Config("clp.s3-bucket") + public ClpConfig setS3Bucket(String s3Bucket) + { + this.s3Bucket = s3Bucket; + return this; + } + + public String getS3KeyPrefix() + { + return s3KeyPrefix; + } + + @Config("clp.s3-key-prefix") + public ClpConfig setS3KeyPrefix(String s3KeyPrefix) + { + this.s3KeyPrefix = s3KeyPrefix; + return this; + } } From a81dabb9f1b8c02f86cc570b37824d658e214972 Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 21 Jan 2025 22:08:58 -0500 Subject: [PATCH 064/126] rename tableMetadataPrefix --- .../src/main/java/com/yscope/presto/ClpClient.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 8c1661311ec36..1cb4e23d8102e 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -64,8 +64,8 @@ public class ClpClient { - public static final String TableMetadataPrefix = "table_metadata_"; - public static final String ArchiveTableSuffix = "archives"; + public static final String columnMetadataPrefix = "column_metadata_"; + public static final String archiveTableSuffix = "archives"; private static final Logger log = Logger.get(ClpClient.class); private final ClpConfig config; private final ClpConfig.FileSource fileSource; @@ -187,7 +187,7 @@ public Set loadTableSchema(String tableName) connection = DriverManager.getConnection(config.getMetadataDbUrl(), config.getMetadataDbUser(), config.getMetadataDbPassword()); Statement statement = connection.createStatement(); - String query = "SELECT * FROM" + config.getMetadataTablePrefix() + TableMetadataPrefix + tableName; + String query = "SELECT * FROM" + config.getMetadataTablePrefix() + columnMetadataPrefix + tableName; ResultSet resultSet = statement.executeQuery(query); while (resultSet.next()) { @@ -252,7 +252,7 @@ public Set loadTable(String schemaName) // Processing the results String databaseName = config.getMetadataDbUrl().substring(config.getMetadataDbUrl().lastIndexOf('/') + 1); - String tableNamePrefix = config.getMetadataTablePrefix() + TableMetadataPrefix; + String tableNamePrefix = config.getMetadataTablePrefix() + columnMetadataPrefix; while (resultSet.next()) { String tableName = resultSet.getString("Tables_in_" + databaseName); if (tableName.startsWith(config.getMetadataTablePrefix()) && tableName.length() > tableNamePrefix.length()) { @@ -310,7 +310,7 @@ public List listArchiveIds(String tableName) connection = DriverManager.getConnection(config.getMetadataDbUrl(), config.getMetadataDbUser(), config.getMetadataDbPassword()); Statement statement = connection.createStatement(); - String query = "SELECT id FROM " + config.getMetadataTablePrefix() + ArchiveTableSuffix; + String query = "SELECT id FROM " + config.getMetadataTablePrefix() + archiveTableSuffix; ResultSet resultSet = statement.executeQuery(query); ImmutableList.Builder archiveIds = ImmutableList.builder(); From e4c758490358d4281bad002630e220bc81402700 Mon Sep 17 00:00:00 2001 From: wraymo Date: Wed, 22 Jan 2025 09:29:50 -0500 Subject: [PATCH 065/126] minor fixes --- presto-clp/pom.xml | 1 + .../java/com/yscope/presto/ClpClient.java | 10 +++-- .../java/com/yscope/presto/ClpConfig.java | 38 ++++++++++++++++--- 3 files changed, 39 insertions(+), 10 deletions(-) diff --git a/presto-clp/pom.xml b/presto-clp/pom.xml index 375988c713b2c..e548a1429409a 100644 --- a/presto-clp/pom.xml +++ b/presto-clp/pom.xml @@ -23,6 +23,7 @@ mysql mysql-connector-java + runtime diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 1cb4e23d8102e..5b65b2477d3fe 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -68,6 +68,7 @@ public class ClpClient public static final String archiveTableSuffix = "archives"; private static final Logger log = Logger.get(ClpClient.class); private final ClpConfig config; + private final String metadataDbUrl; private final ClpConfig.FileSource fileSource; private final Path executablePath; private final Path decompressDir; @@ -78,6 +79,7 @@ public class ClpClient public ClpClient(ClpConfig config) { this.config = requireNonNull(config, "config is null"); + this.metadataDbUrl = "jdbc:mysql://" + config.getMetadataDbHost() + ":" + config.getMetadataDbPort() + "/" + config.getMetadataDbName(); this.fileSource = config.getFileSource(); if (fileSource == ClpConfig.FileSource.LOCAL) { this.executablePath = getExecutablePath(); @@ -184,7 +186,7 @@ public Set loadTableSchema(String tableName) Connection connection = null; LinkedHashSet columnHandles = new LinkedHashSet<>(); try { - connection = DriverManager.getConnection(config.getMetadataDbUrl(), config.getMetadataDbUser(), config.getMetadataDbPassword()); + connection = DriverManager.getConnection(metadataDbUrl, config.getMetadataDbUser(), config.getMetadataDbPassword()); Statement statement = connection.createStatement(); String query = "SELECT * FROM" + config.getMetadataTablePrefix() + columnMetadataPrefix + tableName; @@ -244,14 +246,14 @@ public Set loadTable(String schemaName) ImmutableSet.Builder tableNames = ImmutableSet.builder(); Connection connection = null; try { - connection = DriverManager.getConnection(config.getMetadataDbUrl(), config.getMetadataDbUser(), config.getMetadataDbPassword()); + connection = DriverManager.getConnection(metadataDbUrl, config.getMetadataDbUser(), config.getMetadataDbPassword()); Statement statement = connection.createStatement(); String query = "SHOW TABLES"; ResultSet resultSet = statement.executeQuery(query); // Processing the results - String databaseName = config.getMetadataDbUrl().substring(config.getMetadataDbUrl().lastIndexOf('/') + 1); + String databaseName = config.getMetadataDbName(); String tableNamePrefix = config.getMetadataTablePrefix() + columnMetadataPrefix; while (resultSet.next()) { String tableName = resultSet.getString("Tables_in_" + databaseName); @@ -307,7 +309,7 @@ public List listArchiveIds(String tableName) String bucketName = config.getS3Bucket(); Connection connection = null; try { - connection = DriverManager.getConnection(config.getMetadataDbUrl(), config.getMetadataDbUser(), config.getMetadataDbPassword()); + connection = DriverManager.getConnection(metadataDbUrl, config.getMetadataDbUser(), config.getMetadataDbPassword()); Statement statement = connection.createStatement(); String query = "SELECT id FROM " + config.getMetadataTablePrefix() + archiveTableSuffix; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java index 63a9bacfbda6c..f122a90904a1c 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java @@ -24,7 +24,9 @@ public enum FileSource } private boolean polymorphicTypeEnabled; - private String metadataDbUrl; + private String metadataDbHost; + private String metadataDbPort; + private String metadataDbName; private String metadataDbUser; private String metadataDbPassword; private String metadataTablePrefix; @@ -48,15 +50,39 @@ public ClpConfig setPolymorphicTypeEnabled(boolean polymorphicTypeEnabled) return this; } - public String getMetadataDbUrl() + public String getMetadataDbHost() { - return metadataDbUrl; + return metadataDbHost; } - @Config("clp.metadata-db-url") - public ClpConfig setMetadataDbUrl(String metadataDbUrl) + @Config("clp.metadata-db-host") + public ClpConfig setMetadataDbHost(String metadataDbHost) { - this.metadataDbUrl = metadataDbUrl; + this.metadataDbHost = metadataDbHost; + return this; + } + + public String getMetadataDbPort() + { + return metadataDbPort; + } + + @Config("clp.metadata-db-port") + public ClpConfig setMetadataDbPort(String metadataDbPort) + { + this.metadataDbPort = metadataDbPort; + return this; + } + + public String getMetadataDbName() + { + return metadataDbName; + } + + @Config("clp.metadata-db-name") + public ClpConfig setMetadataDbName(String metadataDbName) + { + this.metadataDbName = metadataDbName; return this; } From c02b71fd77f8d9de0c40e6e151d65517a7edba27 Mon Sep 17 00:00:00 2001 From: wraymo Date: Wed, 22 Jan 2025 16:21:55 -0500 Subject: [PATCH 066/126] rename file source to input source --- .../src/main/java/com/yscope/presto/ClpClient.java | 12 ++++++------ .../src/main/java/com/yscope/presto/ClpConfig.java | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 5b65b2477d3fe..6e1e6e92a893a 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -69,7 +69,7 @@ public class ClpClient private static final Logger log = Logger.get(ClpClient.class); private final ClpConfig config; private final String metadataDbUrl; - private final ClpConfig.FileSource fileSource; + private final ClpConfig.InputSource inputSource; private final Path executablePath; private final Path decompressDir; private final LoadingCache> columnHandleCache; @@ -80,8 +80,8 @@ public ClpClient(ClpConfig config) { this.config = requireNonNull(config, "config is null"); this.metadataDbUrl = "jdbc:mysql://" + config.getMetadataDbHost() + ":" + config.getMetadataDbPort() + "/" + config.getMetadataDbName(); - this.fileSource = config.getFileSource(); - if (fileSource == ClpConfig.FileSource.LOCAL) { + this.inputSource = config.getInputSource(); + if (inputSource == ClpConfig.InputSource.LOCAL) { this.executablePath = getExecutablePath(); this.decompressDir = Paths.get(System.getProperty("java.io.tmpdir"), "clp_decompress"); } @@ -286,7 +286,7 @@ public Set listTables() public List listArchiveIds(String tableName) { - if (fileSource == ClpConfig.FileSource.LOCAL) { + if (inputSource == ClpConfig.InputSource.LOCAL) { Path tableDir = Paths.get(config.getClpArchiveDir(), tableName); if (!Files.exists(tableDir) || !Files.isDirectory(tableDir)) { return ImmutableList.of(); @@ -356,7 +356,7 @@ public ProcessBuilder getRecords(String tableName, String archiveId, Optional columns) { - if (fileSource == ClpConfig.FileSource.S3) { + if (inputSource == ClpConfig.InputSource.S3) { throw new RuntimeException("Cannot handle S3 source"); } Path tableArchiveDir = Paths.get(config.getClpArchiveDir(), tableName); @@ -377,7 +377,7 @@ private ProcessBuilder searchTable(String tableName, String archiveId, String qu private boolean decompressRecords(String tableName) { - if (fileSource == ClpConfig.FileSource.S3) { + if (inputSource == ClpConfig.InputSource.S3) { throw new RuntimeException("Cannot handle S3 source"); } Path tableDecompressDir = decompressDir.resolve(tableName); diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java index f122a90904a1c..205b84b9c038a 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java @@ -17,7 +17,7 @@ public class ClpConfig { - public enum FileSource + public enum InputSource { LOCAL, S3 @@ -32,7 +32,7 @@ public enum FileSource private String metadataTablePrefix; private long metadataRefreshInterval; private long metadataExpireInterval; - private FileSource fileSource = FileSource.LOCAL; + private InputSource inputSource = InputSource.LOCAL; private String clpExecutablePath; private String clpArchiveDir; private String s3Bucket; @@ -146,15 +146,15 @@ public ClpConfig setMetadataExpireInterval(long metadataExpireInterval) return this; } - public FileSource getFileSource() + public InputSource getInputSource() { - return fileSource; + return inputSource; } @Config("clp.file-source") - public ClpConfig setFileSource(FileSource fileSource) + public ClpConfig setInputSource(InputSource inputSource) { - this.fileSource = fileSource; + this.inputSource = inputSource; return this; } From 1c199da5aea523b2530133a0e46b7101fdf32bf4 Mon Sep 17 00:00:00 2001 From: wraymo Date: Wed, 22 Jan 2025 17:02:17 -0500 Subject: [PATCH 067/126] minor fix --- presto-clp/src/main/java/com/yscope/presto/ClpClient.java | 2 +- presto-clp/src/main/java/com/yscope/presto/ClpConfig.java | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 6e1e6e92a893a..06c16d642e7be 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -95,7 +95,7 @@ public ClpClient(ClpConfig config) .build(CacheLoader.from(this::loadTableSchema)); this.tableNameCache = CacheBuilder.newBuilder() - .expireAfterWrite(config.getMetadataExpireInterval(), SECONDS) // TODO: Configure + .expireAfterWrite(config.getMetadataExpireInterval(), SECONDS) .refreshAfterWrite(config.getMetadataRefreshInterval(), SECONDS) .build(CacheLoader.from(this::loadTable)); } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java index 205b84b9c038a..55b5fd07816d8 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java @@ -23,15 +23,15 @@ public enum InputSource S3 } - private boolean polymorphicTypeEnabled; + private boolean polymorphicTypeEnabled = true; private String metadataDbHost; private String metadataDbPort; private String metadataDbName; private String metadataDbUser; private String metadataDbPassword; private String metadataTablePrefix; - private long metadataRefreshInterval; - private long metadataExpireInterval; + private long metadataRefreshInterval = 60; + private long metadataExpireInterval = 600; private InputSource inputSource = InputSource.LOCAL; private String clpExecutablePath; private String clpArchiveDir; @@ -151,7 +151,7 @@ public InputSource getInputSource() return inputSource; } - @Config("clp.file-source") + @Config("clp.input-source") public ClpConfig setInputSource(InputSource inputSource) { this.inputSource = inputSource; From f7cd4d76679335f747aee2d8ec76a33bd7c297a4 Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 24 Jan 2025 11:07:34 -0500 Subject: [PATCH 068/126] fix mysql runtime error --- .../java/com/yscope/presto/ClpClient.java | 48 +++++++++++-------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 06c16d642e7be..3b96f030d23b8 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -80,6 +80,12 @@ public ClpClient(ClpConfig config) { this.config = requireNonNull(config, "config is null"); this.metadataDbUrl = "jdbc:mysql://" + config.getMetadataDbHost() + ":" + config.getMetadataDbPort() + "/" + config.getMetadataDbName(); + try { + Class.forName("com.mysql.jdbc.Driver"); + } + catch (ClassNotFoundException e) { + log.error(e, "Failed to load MySQL JDBC driver"); + } this.inputSource = config.getInputSource(); if (inputSource == ClpConfig.InputSource.LOCAL) { this.executablePath = getExecutablePath(); @@ -104,7 +110,9 @@ public ClpClient(ClpConfig config) public void start() { try { - Files.createDirectories(decompressDir); + if (inputSource == ClpConfig.InputSource.LOCAL) { + Files.createDirectories(decompressDir); + } } catch (IOException e) { log.error(e, "Failed to create decompression directory"); @@ -115,27 +123,29 @@ public void start() public void close() { try { - Files.walkFileTree(decompressDir, new SimpleFileVisitor() - { - @Override - public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException - { - Files.delete(file); - return FileVisitResult.CONTINUE; - } - - @Override - public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException + if (inputSource == ClpConfig.InputSource.LOCAL) { + Files.walkFileTree(decompressDir, new SimpleFileVisitor() { - if (exc == null) { - Files.delete(dir); + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException + { + Files.delete(file); return FileVisitResult.CONTINUE; } - else { - throw exc; // Directory iteration failed + + @Override + public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException + { + if (exc == null) { + Files.delete(dir); + return FileVisitResult.CONTINUE; + } + else { + throw exc; // Directory iteration failed + } } - } - }); + }); + } } catch (IOException e) { log.error(e, "Failed to delete decompression directory"); @@ -189,7 +199,7 @@ public Set loadTableSchema(String tableName) connection = DriverManager.getConnection(metadataDbUrl, config.getMetadataDbUser(), config.getMetadataDbPassword()); Statement statement = connection.createStatement(); - String query = "SELECT * FROM" + config.getMetadataTablePrefix() + columnMetadataPrefix + tableName; + String query = "SELECT * FROM " + config.getMetadataTablePrefix() + columnMetadataPrefix + tableName; ResultSet resultSet = statement.executeQuery(query); while (resultSet.next()) { From 5a227c5f24374e1105c952c1085f9370de699a40 Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 24 Jan 2025 11:15:42 -0500 Subject: [PATCH 069/126] update git submodule path --- .gitmodules | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index bb184984a4368..087837ce3098f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "presto-native-execution/velox"] path = presto-native-execution/velox - url = https://github.com/wraymo/velox.git - branch = clp_integration + url = https://github.com/y-scope/velox.git + branch = clp_integration_s3 From 387a3ed37f9b357bb97927322a278ffaa0207407 Mon Sep 17 00:00:00 2001 From: wraymo Date: Mon, 27 Jan 2025 15:38:05 -0500 Subject: [PATCH 070/126] only compile presto_server target --- presto-native-execution/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-native-execution/Makefile b/presto-native-execution/Makefile index f3fb5f709f4d5..015fe22f7befb 100644 --- a/presto-native-execution/Makefile +++ b/presto-native-execution/Makefile @@ -94,7 +94,7 @@ release: #: Build the release version cmake-and-build: #: cmake and build without updating submodules which requires git cmake -B "$(BUILD_BASE_DIR)/$(BUILD_DIR)" $(FORCE_COLOR) $(CMAKE_FLAGS) $(EXTRA_CMAKE_FLAGS) - cmake --build $(BUILD_BASE_DIR)/$(BUILD_DIR) -j $(NUM_THREADS) + cmake --build $(BUILD_BASE_DIR)/$(BUILD_DIR) --target presto_server -j $(NUM_THREADS) unittest: debug #: Build with debugging and run unit tests cd $(BUILD_BASE_DIR)/debug && ctest -j $(NUM_THREADS) -VV --output-on-failure --exclude-regex velox.* From bd8defec9bf003f389c6560cb8abe35e5f4858be Mon Sep 17 00:00:00 2001 From: wraymo Date: Mon, 27 Jan 2025 16:08:17 -0500 Subject: [PATCH 071/126] update worker configurations --- .../etc_worker/catalog/clp.properties | 12 +++++++++--- .../etc_worker/catalog/hive.properties | 1 + .../etc_worker/catalog/iceberg.properties | 1 + .../etc_worker/catalog/postgresql.properties | 3 --- .../etc_worker/catalog/tpchstandard.properties | 1 + presto-native-execution/etc_worker/config.properties | 10 +++++----- presto-native-execution/etc_worker/jvm.config | 10 ---------- presto-native-execution/etc_worker/log.properties | 1 - presto-native-execution/etc_worker/node.properties | 5 +++-- presto-native-execution/etc_worker/velox.properties | 1 + 10 files changed, 21 insertions(+), 24 deletions(-) create mode 100644 presto-native-execution/etc_worker/catalog/iceberg.properties delete mode 100644 presto-native-execution/etc_worker/catalog/postgresql.properties create mode 100644 presto-native-execution/etc_worker/catalog/tpchstandard.properties delete mode 100644 presto-native-execution/etc_worker/jvm.config delete mode 100644 presto-native-execution/etc_worker/log.properties create mode 100644 presto-native-execution/etc_worker/velox.properties diff --git a/presto-native-execution/etc_worker/catalog/clp.properties b/presto-native-execution/etc_worker/catalog/clp.properties index 0feade69e2944..db2a10bcd7649 100644 --- a/presto-native-execution/etc_worker/catalog/clp.properties +++ b/presto-native-execution/etc_worker/catalog/clp.properties @@ -1,4 +1,10 @@ connector.name=clp -executable-path=/root/clp/components/core/build/clp-s -archive-dir=/root/presto/presto-native-execution/clp_archive -polymorphic-type-enabled=true \ No newline at end of file +clp.metadata-db-host=localhost +clp.metadata-db-port=3306 +clp.metadata-db-user=raymo +clp.metadata-db-password=password +clp.metadata-db-name=clp_db +clp.metadata-table-prefix=clp_ +clp.input-source=s3 +clp.s3-bucket=https://example.amazonaws.com +clp.polymorphic-type-enabled=true \ No newline at end of file diff --git a/presto-native-execution/etc_worker/catalog/hive.properties b/presto-native-execution/etc_worker/catalog/hive.properties index 9109cb18ef86c..ee8abe93af853 100644 --- a/presto-native-execution/etc_worker/catalog/hive.properties +++ b/presto-native-execution/etc_worker/catalog/hive.properties @@ -2,3 +2,4 @@ connector.name=hive-hadoop2 hive.metastore=file hive.metastore.catalog.dir=file:///root/presto/presto-native-execution/hive_catalog hive.parquet.use-column-names=true +file-column-names-read-as-lower-case=true diff --git a/presto-native-execution/etc_worker/catalog/iceberg.properties b/presto-native-execution/etc_worker/catalog/iceberg.properties new file mode 100644 index 0000000000000..f3a43dcb28126 --- /dev/null +++ b/presto-native-execution/etc_worker/catalog/iceberg.properties @@ -0,0 +1 @@ +connector.name=iceberg diff --git a/presto-native-execution/etc_worker/catalog/postgresql.properties b/presto-native-execution/etc_worker/catalog/postgresql.properties deleted file mode 100644 index 538a221ee015b..0000000000000 --- a/presto-native-execution/etc_worker/catalog/postgresql.properties +++ /dev/null @@ -1,3 +0,0 @@ -connector.name=postgresql -connection-url=jdbc:postgresql://localhost:5432/mydb -connection-user=raymo diff --git a/presto-native-execution/etc_worker/catalog/tpchstandard.properties b/presto-native-execution/etc_worker/catalog/tpchstandard.properties new file mode 100644 index 0000000000000..16e833ca8f436 --- /dev/null +++ b/presto-native-execution/etc_worker/catalog/tpchstandard.properties @@ -0,0 +1 @@ +connector.name=tpch \ No newline at end of file diff --git a/presto-native-execution/etc_worker/config.properties b/presto-native-execution/etc_worker/config.properties index 3a5190ddbd859..45bf71c311bb9 100644 --- a/presto-native-execution/etc_worker/config.properties +++ b/presto-native-execution/etc_worker/config.properties @@ -1,6 +1,6 @@ -coordinator=false +discovery.uri=http://127.0.0.1:8080 +presto.version=0.290-SNAPSHOT-18d3ea5 http-server.http.port=7777 -query.max-memory=4GB -query.max-memory-per-node=4GB -discovery.uri=http://localhost:8080 -regex-library=RE2J +shutdown-onset-sec=1 +register-test-functions=false +runtime-metrics-collection-enabled=false diff --git a/presto-native-execution/etc_worker/jvm.config b/presto-native-execution/etc_worker/jvm.config deleted file mode 100644 index 75403dd8a1a38..0000000000000 --- a/presto-native-execution/etc_worker/jvm.config +++ /dev/null @@ -1,10 +0,0 @@ --server --agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5007 --Xmx16G --XX:+UseG1GC --XX:G1HeapRegionSize=32M --XX:+UseGCOverheadLimit --XX:+ExplicitGCInvokesConcurrent --XX:+HeapDumpOnOutOfMemoryError --XX:+ExitOnOutOfMemoryError --Djdk.attach.allowAttachSelf=true diff --git a/presto-native-execution/etc_worker/log.properties b/presto-native-execution/etc_worker/log.properties deleted file mode 100644 index 3abc29ce3d86a..0000000000000 --- a/presto-native-execution/etc_worker/log.properties +++ /dev/null @@ -1 +0,0 @@ -com.facebook.presto=INFO \ No newline at end of file diff --git a/presto-native-execution/etc_worker/node.properties b/presto-native-execution/etc_worker/node.properties index 6a2e3ec052a6c..bc1c85cbded2a 100644 --- a/presto-native-execution/etc_worker/node.properties +++ b/presto-native-execution/etc_worker/node.properties @@ -1,3 +1,4 @@ node.environment=production -node.id=worker -node.data-dir=/root/presto/presto-native-execution/data_worker \ No newline at end of file +node.internal-address=127.0.0.1 +node.location=testing-location +node.id=worker-1 \ No newline at end of file diff --git a/presto-native-execution/etc_worker/velox.properties b/presto-native-execution/etc_worker/velox.properties new file mode 100644 index 0000000000000..6c2506bd99a8e --- /dev/null +++ b/presto-native-execution/etc_worker/velox.properties @@ -0,0 +1 @@ +mutable-config=true \ No newline at end of file From cbe0786bc63dac4fc27b325b6f05a620421ffb5e Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 28 Jan 2025 12:19:34 -0500 Subject: [PATCH 072/126] do not treat warnings as errors --- presto-native-execution/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-native-execution/Makefile b/presto-native-execution/Makefile index 015fe22f7befb..cf9a7a63ce747 100644 --- a/presto-native-execution/Makefile +++ b/presto-native-execution/Makefile @@ -14,7 +14,7 @@ BUILD_BASE_DIR=_build BUILD_DIR=release BUILD_TYPE=Release -TREAT_WARNINGS_AS_ERRORS ?= 1 +TREAT_WARNINGS_AS_ERRORS = 0 ENABLE_WALL ?= 1 NUM_THREADS ?= $(shell getconf _NPROCESSORS_CONF 2>/dev/null || echo 1) CPU_TARGET ?= "avx" From ad9f5156fa71dfc6f57abb3c32473a728dcc327a Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 4 Feb 2025 19:54:15 -0500 Subject: [PATCH 073/126] add an s3 coordinator configuration example --- etc_coordinator_s3/catalog/clp.properties | 10 ++++++++++ etc_coordinator_s3/config.properties | 15 +++++++++++++++ etc_coordinator_s3/jvm.config | 10 ++++++++++ etc_coordinator_s3/log.properties | 1 + etc_coordinator_s3/node.properties | 2 ++ 5 files changed, 38 insertions(+) create mode 100644 etc_coordinator_s3/catalog/clp.properties create mode 100644 etc_coordinator_s3/config.properties create mode 100644 etc_coordinator_s3/jvm.config create mode 100644 etc_coordinator_s3/log.properties create mode 100644 etc_coordinator_s3/node.properties diff --git a/etc_coordinator_s3/catalog/clp.properties b/etc_coordinator_s3/catalog/clp.properties new file mode 100644 index 0000000000000..db2a10bcd7649 --- /dev/null +++ b/etc_coordinator_s3/catalog/clp.properties @@ -0,0 +1,10 @@ +connector.name=clp +clp.metadata-db-host=localhost +clp.metadata-db-port=3306 +clp.metadata-db-user=raymo +clp.metadata-db-password=password +clp.metadata-db-name=clp_db +clp.metadata-table-prefix=clp_ +clp.input-source=s3 +clp.s3-bucket=https://example.amazonaws.com +clp.polymorphic-type-enabled=true \ No newline at end of file diff --git a/etc_coordinator_s3/config.properties b/etc_coordinator_s3/config.properties new file mode 100644 index 0000000000000..a90dca3192543 --- /dev/null +++ b/etc_coordinator_s3/config.properties @@ -0,0 +1,15 @@ +coordinator=true +node-scheduler.include-coordinator=false +http-server.http.port=8080 +query.max-memory=1GB +query.max-memory-per-node=1GB +discovery-server.enabled=true +discovery.uri=http://localhost:8080 +#task.max-worker-threads=1 +#task.concurrency=1 +experimental.internal-communication.thrift-transport-enabled=true +optimizer.optimize-hash-generation=false +regex-library=RE2J +use-alternative-function-signatures=true +inline-sql-functions=false +nested-data-serialization-enabled=false \ No newline at end of file diff --git a/etc_coordinator_s3/jvm.config b/etc_coordinator_s3/jvm.config new file mode 100644 index 0000000000000..1821127484229 --- /dev/null +++ b/etc_coordinator_s3/jvm.config @@ -0,0 +1,10 @@ +-server +-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5006 +-Xmx4G +-XX:+UseG1GC +-XX:G1HeapRegionSize=32M +-XX:+UseGCOverheadLimit +-XX:+ExplicitGCInvokesConcurrent +-XX:+HeapDumpOnOutOfMemoryError +-XX:+ExitOnOutOfMemoryError +-Djdk.attach.allowAttachSelf=true diff --git a/etc_coordinator_s3/log.properties b/etc_coordinator_s3/log.properties new file mode 100644 index 0000000000000..ccde82c2d8c33 --- /dev/null +++ b/etc_coordinator_s3/log.properties @@ -0,0 +1 @@ +com.facebook.presto=DEBUG \ No newline at end of file diff --git a/etc_coordinator_s3/node.properties b/etc_coordinator_s3/node.properties new file mode 100644 index 0000000000000..ab655ca2202ea --- /dev/null +++ b/etc_coordinator_s3/node.properties @@ -0,0 +1,2 @@ +node.environment=production +node.id=coordinator \ No newline at end of file From 91b36f01936fe35a60a8567063cd7275a85a0118 Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 4 Feb 2025 21:11:15 -0500 Subject: [PATCH 074/126] update velox dependency and modify example configuration files --- etc_coordinator_s3/catalog/clp.properties | 7 ++++--- presto-native-execution/etc_worker/catalog/clp.properties | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/etc_coordinator_s3/catalog/clp.properties b/etc_coordinator_s3/catalog/clp.properties index db2a10bcd7649..86439329be564 100644 --- a/etc_coordinator_s3/catalog/clp.properties +++ b/etc_coordinator_s3/catalog/clp.properties @@ -1,10 +1,11 @@ connector.name=clp clp.metadata-db-host=localhost clp.metadata-db-port=3306 -clp.metadata-db-user=raymo +clp.metadata-db-user=clp-user clp.metadata-db-password=password -clp.metadata-db-name=clp_db +clp.metadata-db-name=clp-db clp.metadata-table-prefix=clp_ clp.input-source=s3 -clp.s3-bucket=https://example.amazonaws.com +clp.s3-bucket=https://example.s3.us-east-1.amazonaws.com +clp.s3-key-prefix=test/ clp.polymorphic-type-enabled=true \ No newline at end of file diff --git a/presto-native-execution/etc_worker/catalog/clp.properties b/presto-native-execution/etc_worker/catalog/clp.properties index db2a10bcd7649..86439329be564 100644 --- a/presto-native-execution/etc_worker/catalog/clp.properties +++ b/presto-native-execution/etc_worker/catalog/clp.properties @@ -1,10 +1,11 @@ connector.name=clp clp.metadata-db-host=localhost clp.metadata-db-port=3306 -clp.metadata-db-user=raymo +clp.metadata-db-user=clp-user clp.metadata-db-password=password -clp.metadata-db-name=clp_db +clp.metadata-db-name=clp-db clp.metadata-table-prefix=clp_ clp.input-source=s3 -clp.s3-bucket=https://example.amazonaws.com +clp.s3-bucket=https://example.s3.us-east-1.amazonaws.com +clp.s3-key-prefix=test/ clp.polymorphic-type-enabled=true \ No newline at end of file From 985adb58247e1ebe36a2e73215555db5437271df Mon Sep 17 00:00:00 2001 From: rwang22 Date: Wed, 26 Feb 2025 14:44:56 +0000 Subject: [PATCH 075/126] remove connectorFactory variable from ClpPlugin --- .../src/main/java/com/yscope/presto/ClpPlugin.java | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpPlugin.java b/presto-clp/src/main/java/com/yscope/presto/ClpPlugin.java index f9325e0698703..97d86dc0c2cba 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpPlugin.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpPlugin.java @@ -20,16 +20,9 @@ public class ClpPlugin implements Plugin { - private final ConnectorFactory connectorFactory; - - public ClpPlugin() - { - connectorFactory = new ClpConnectorFactory(); - } - @Override public Iterable getConnectorFactories() { - return ImmutableList.of(connectorFactory); + return ImmutableList.of(new ClpConnectorFactory()); } } From c77e33b67442a3436d33d9847129853d5c0eb16c Mon Sep 17 00:00:00 2001 From: rwang22 Date: Wed, 26 Feb 2025 14:46:39 +0000 Subject: [PATCH 076/126] remove parseSchemaFile from ClpClient --- .../java/com/yscope/presto/ClpClient.java | 58 ------------------- 1 file changed, 58 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 3b96f030d23b8..dd2bf76fa19b9 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -409,64 +409,6 @@ private boolean decompressRecords(String tableName) } } - private Set parseSchemaTreeFile(Path schemaMapsFile) - { - SchemaTree schemaTree = new SchemaTree(); - try (InputStream fileInputStream = Files.newInputStream(schemaMapsFile); - ZstdInputStream zstdInputStream = new ZstdInputStream(fileInputStream); - DataInputStream dataInputStream = new DataInputStream(zstdInputStream)) { - byte[] longBytes = new byte[8]; - byte[] intBytes = new byte[4]; - dataInputStream.readFully(longBytes); - long numberOfNodes = ByteBuffer.wrap(longBytes).order(ByteOrder.nativeOrder()).getLong(); - for (int i = 0; i < numberOfNodes; i++) { - dataInputStream.readFully(intBytes); - int parentId = ByteBuffer.wrap(intBytes).order(ByteOrder.nativeOrder()).getInt(); - dataInputStream.readFully(longBytes); - long stringSize = ByteBuffer.wrap(longBytes).order(ByteOrder.nativeOrder()).getLong(); - byte[] stringBytes = new byte[(int) stringSize]; - dataInputStream.readFully(stringBytes); - String name = new String(stringBytes, StandardCharsets.UTF_8); - SchemaNode.NodeType type = SchemaNode.NodeType.fromType(dataInputStream.readByte()); - schemaTree.addNode(parentId, name, type); - } - - ArrayList primitiveTypeFields = schemaTree.getPrimitiveFields(); - LinkedHashSet columnHandles = new LinkedHashSet<>(); - for (SchemaNode.NodeTuple nodeTuple : primitiveTypeFields) { - SchemaNode.NodeType nodeType = nodeTuple.getType(); - Type prestoType = null; - switch (nodeType) { - case Integer: - prestoType = BigintType.BIGINT; - break; - case Float: - prestoType = DoubleType.DOUBLE; - break; - case ClpString: - case VarString: - case DateString: - case NullValue: - prestoType = VarcharType.VARCHAR; - break; - case UnstructuredArray: - prestoType = new ArrayType(VarcharType.VARCHAR); - break; - case Boolean: - prestoType = BooleanType.BOOLEAN; - break; - default: - break; - } - columnHandles.add(new ClpColumnHandle(nodeTuple.getName(), prestoType, true)); - } - return columnHandles; - } - catch (IOException e) { - return ImmutableSet.of(); - } - } - private Set handlePolymorphicType(Set columnHandles) { Map> columnNameToColumnHandles = new HashMap<>(); From 9bbbe5c23ef058ee2b117fc5e9e1186b0fad3688 Mon Sep 17 00:00:00 2001 From: rwang22 Date: Fri, 28 Feb 2025 01:49:41 +0000 Subject: [PATCH 077/126] refactor metadata api, delete uncessary files and refactor the test --- .../java/com/yscope/presto/ClpClient.java | 189 ++------------- .../java/com/yscope/presto/ClpConnector.java | 22 +- .../presto/ClpFilterToKqlConverter.java | 4 - .../java/com/yscope/presto/ClpMetadata.java | 52 ++-- .../com/yscope/presto/ClpPlanOptimizer.java | 8 +- .../presto/ClpPlanOptimizerProvider.java | 8 +- .../com/yscope/presto/ClpRecordCursor.java | 224 ------------------ .../java/com/yscope/presto/ClpRecordSet.java | 50 ---- .../yscope/presto/ClpRecordSetProvider.java | 61 ----- .../com/yscope/presto/ClpTableHandle.java | 17 +- ...nverter.java => TestClpPlanOptimizer.java} | 58 ++++- .../yscope/presto/TestClpRecordCursor.java | 167 ------------- 12 files changed, 126 insertions(+), 734 deletions(-) delete mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java delete mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java delete mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java rename presto-clp/src/test/java/com/yscope/presto/{TestClpFilterToKqlConverter.java => TestClpPlanOptimizer.java} (82%) delete mode 100644 presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index dd2bf76fa19b9..40c0a4e7165f9 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -20,32 +20,19 @@ import com.facebook.presto.common.type.DoubleType; import com.facebook.presto.common.type.Type; import com.facebook.presto.common.type.VarcharType; -import com.github.luben.zstd.ZstdInputStream; +import com.facebook.presto.spi.SchemaTableName; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.yscope.presto.schema.SchemaNode; -import com.yscope.presto.schema.SchemaTree; - -import javax.annotation.PostConstruct; -import javax.annotation.PreDestroy; import javax.inject.Inject; -import java.io.DataInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.charset.StandardCharsets; import java.nio.file.DirectoryStream; -import java.nio.file.FileVisitResult; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; -import java.nio.file.SimpleFileVisitor; -import java.nio.file.attribute.BasicFileAttributes; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; @@ -56,7 +43,6 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Map; -import java.util.Optional; import java.util.Set; import static java.util.Objects.requireNonNull; @@ -64,37 +50,31 @@ public class ClpClient { - public static final String columnMetadataPrefix = "column_metadata_"; - public static final String archiveTableSuffix = "archives"; + private static final String COLUMN_METADATA_PREFIX = "column_metadata_"; + private static final String ARCHIVE_TABLE_SUFFIX = "archives"; private static final Logger log = Logger.get(ClpClient.class); + private static final String QUERY_SELECT_COLUMNS = "SELECT * FROM %s" + COLUMN_METADATA_PREFIX + "?"; + private static final String QUERY_SHOW_TABLES = "SHOW TABLES"; + private static final String QUERY_SELECT_ARCHIVE_IDS = "SELECT id FROM %s" + ARCHIVE_TABLE_SUFFIX; + private final ClpConfig config; private final String metadataDbUrl; private final ClpConfig.InputSource inputSource; - private final Path executablePath; - private final Path decompressDir; - private final LoadingCache> columnHandleCache; + private final LoadingCache> columnHandleCache; private final LoadingCache> tableNameCache; @Inject public ClpClient(ClpConfig config) { this.config = requireNonNull(config, "config is null"); - this.metadataDbUrl = "jdbc:mysql://" + config.getMetadataDbHost() + ":" + config.getMetadataDbPort() + "/" + config.getMetadataDbName(); try { Class.forName("com.mysql.jdbc.Driver"); } catch (ClassNotFoundException e) { log.error(e, "Failed to load MySQL JDBC driver"); } + this.metadataDbUrl = "jdbc:mysql://" + config.getMetadataDbHost() + ":" + config.getMetadataDbPort() + "/" + config.getMetadataDbName(); this.inputSource = config.getInputSource(); - if (inputSource == ClpConfig.InputSource.LOCAL) { - this.executablePath = getExecutablePath(); - this.decompressDir = Paths.get(System.getProperty("java.io.tmpdir"), "clp_decompress"); - } - else { - this.executablePath = null; - this.decompressDir = null; - } this.columnHandleCache = CacheBuilder.newBuilder() .expireAfterWrite(config.getMetadataExpireInterval(), SECONDS) .refreshAfterWrite(config.getMetadataRefreshInterval(), SECONDS) @@ -106,100 +86,21 @@ public ClpClient(ClpConfig config) .build(CacheLoader.from(this::loadTable)); } - @PostConstruct - public void start() - { - try { - if (inputSource == ClpConfig.InputSource.LOCAL) { - Files.createDirectories(decompressDir); - } - } - catch (IOException e) { - log.error(e, "Failed to create decompression directory"); - } - } - - @PreDestroy - public void close() - { - try { - if (inputSource == ClpConfig.InputSource.LOCAL) { - Files.walkFileTree(decompressDir, new SimpleFileVisitor() - { - @Override - public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException - { - Files.delete(file); - return FileVisitResult.CONTINUE; - } - - @Override - public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException - { - if (exc == null) { - Files.delete(dir); - return FileVisitResult.CONTINUE; - } - else { - throw exc; // Directory iteration failed - } - } - }); - } - } - catch (IOException e) { - log.error(e, "Failed to delete decompression directory"); - } - } - public ClpConfig getConfig() { return config; } - private Path getExecutablePath() + public Set loadTableSchema(SchemaTableName schemaTableName) { - String executablePathString = config.getClpExecutablePath(); - if (executablePathString == null || executablePathString.isEmpty()) { - Path executablePath = getExecutablePathFromEnvironment(); - if (executablePath == null) { - throw new RuntimeException("CLP executable path is not set"); - } - return executablePath; - } - Path executablePath = Paths.get(executablePathString); - if (!Files.exists(executablePath) || !Files.isRegularFile(executablePath)) { - executablePath = getExecutablePathFromEnvironment(); - if (executablePath == null) { - throw new RuntimeException("CLP executable path is not set"); - } - } - return executablePath; - } - - private Path getExecutablePathFromEnvironment() - { - String executablePathString = System.getenv("CLP_EXECUTABLE_PATH"); - if (executablePathString == null || executablePathString.isEmpty()) { - return null; - } - - Path executablePath = Paths.get(executablePathString); - if (!Files.exists(executablePath) || !Files.isRegularFile(executablePath)) { - return null; - } - return executablePath; - } + String query = "SELECT * FROM " + config.getMetadataTablePrefix() + COLUMN_METADATA_PREFIX + schemaTableName.getTableName(); - public Set loadTableSchema(String tableName) - { Connection connection = null; LinkedHashSet columnHandles = new LinkedHashSet<>(); try { connection = DriverManager.getConnection(metadataDbUrl, config.getMetadataDbUser(), config.getMetadataDbPassword()); Statement statement = connection.createStatement(); - String query = "SELECT * FROM " + config.getMetadataTablePrefix() + columnMetadataPrefix + tableName; ResultSet resultSet = statement.executeQuery(query); while (resultSet.next()) { @@ -264,7 +165,7 @@ public Set loadTable(String schemaName) // Processing the results String databaseName = config.getMetadataDbName(); - String tableNamePrefix = config.getMetadataTablePrefix() + columnMetadataPrefix; + String tableNamePrefix = config.getMetadataTablePrefix() + COLUMN_METADATA_PREFIX; while (resultSet.next()) { String tableName = resultSet.getString("Tables_in_" + databaseName); if (tableName.startsWith(config.getMetadataTablePrefix()) && tableName.length() > tableNamePrefix.length()) { @@ -289,9 +190,9 @@ public Set loadTable(String schemaName) return tableNames.build(); } - public Set listTables() + public Set listTables(String schemaName) { - return tableNameCache.getUnchecked("default"); + return tableNameCache.getUnchecked(schemaName); } public List listArchiveIds(String tableName) @@ -316,13 +217,12 @@ public List listArchiveIds(String tableName) } } else { - String bucketName = config.getS3Bucket(); Connection connection = null; try { connection = DriverManager.getConnection(metadataDbUrl, config.getMetadataDbUser(), config.getMetadataDbPassword()); Statement statement = connection.createStatement(); - String query = "SELECT id FROM " + config.getMetadataTablePrefix() + archiveTableSuffix; + String query = "SELECT id FROM " + config.getMetadataTablePrefix() + ARCHIVE_TABLE_SUFFIX; ResultSet resultSet = statement.executeQuery(query); ImmutableList.Builder archiveIds = ImmutableList.builder(); @@ -349,64 +249,9 @@ public List listArchiveIds(String tableName) } } - public Set listColumns(String tableName) + public Set listColumns(SchemaTableName schemaTableName) { - return columnHandleCache.getUnchecked(tableName); - } - - public ProcessBuilder getRecords(String tableName, String archiveId, Optional query, List columns) - { - if (!listTables().contains(tableName)) { - return null; - } - - return query.map(s -> searchTable(tableName, archiveId, s, columns)) - .orElseGet(() -> searchTable(tableName, archiveId, "*", columns)); - } - - private ProcessBuilder searchTable(String tableName, String archiveId, String query, List columns) - { - if (inputSource == ClpConfig.InputSource.S3) { - throw new RuntimeException("Cannot handle S3 source"); - } - Path tableArchiveDir = Paths.get(config.getClpArchiveDir(), tableName); - List argumentList = new ArrayList<>(); - argumentList.add(executablePath.toString()); - argumentList.add("s"); - argumentList.add(tableArchiveDir.toString()); - argumentList.add("--archive-id"); - argumentList.add(archiveId); - argumentList.add(query); - if (!columns.isEmpty()) { - argumentList.add("--projection"); - argumentList.addAll(columns); - } - log.info("Argument list: %s", argumentList.toString()); - return new ProcessBuilder(argumentList); - } - - private boolean decompressRecords(String tableName) - { - if (inputSource == ClpConfig.InputSource.S3) { - throw new RuntimeException("Cannot handle S3 source"); - } - Path tableDecompressDir = decompressDir.resolve(tableName); - Path tableArchiveDir = Paths.get(config.getClpArchiveDir(), tableName); - - try { - ProcessBuilder processBuilder = - new ProcessBuilder(executablePath.toString(), - "x", - tableArchiveDir.toString(), - tableDecompressDir.toString()); - Process process = processBuilder.start(); - process.waitFor(); - return process.exitValue() == 0; - } - catch (IOException | InterruptedException e) { - log.error(e, "Failed to decompress records for table %s", tableName); - return false; - } + return columnHandleCache.getUnchecked(schemaTableName); } private Set handlePolymorphicType(Set columnHandles) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java b/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java index 25b3c9e6f241c..b081abe7eb2e4 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java @@ -15,16 +15,13 @@ import com.facebook.airlift.bootstrap.LifeCycleManager; import com.facebook.airlift.log.Logger; -import com.facebook.presto.common.type.TypeManager; import com.facebook.presto.spi.connector.Connector; import com.facebook.presto.spi.connector.ConnectorMetadata; import com.facebook.presto.spi.connector.ConnectorPlanOptimizerProvider; -import com.facebook.presto.spi.connector.ConnectorRecordSetProvider; import com.facebook.presto.spi.connector.ConnectorSplitManager; import com.facebook.presto.spi.connector.ConnectorTransactionHandle; import com.facebook.presto.spi.function.FunctionMetadataManager; import com.facebook.presto.spi.function.StandardFunctionResolution; -import com.facebook.presto.spi.relation.RowExpressionService; import com.facebook.presto.spi.transaction.IsolationLevel; import javax.inject.Inject; @@ -39,36 +36,27 @@ public class ClpConnector private final LifeCycleManager lifeCycleManager; private final ClpMetadata metadata; private final ClpSplitManager splitManager; - private final ClpRecordSetProvider recordSetProvider; private final FunctionMetadataManager functionManager; private final StandardFunctionResolution functionResolution; - private final RowExpressionService rowExpressionService; - private final TypeManager typeManager; @Inject public ClpConnector(LifeCycleManager lifeCycleManager, ClpMetadata metadata, ClpSplitManager splitManager, - ClpRecordSetProvider recordSetProvider, FunctionMetadataManager functionManager, - StandardFunctionResolution functionResolution, - RowExpressionService rowExpressionService, - TypeManager typeManager) + StandardFunctionResolution functionResolution) { this.lifeCycleManager = requireNonNull(lifeCycleManager, "lifeCycleManager is null"); this.metadata = requireNonNull(metadata, "metadata is null"); this.splitManager = requireNonNull(splitManager, "splitManager is null"); - this.recordSetProvider = requireNonNull(recordSetProvider, "recordSetProvider is null"); this.functionManager = requireNonNull(functionManager, "functionManager is null"); this.functionResolution = requireNonNull(functionResolution, "functionResolution is null"); - this.rowExpressionService = requireNonNull(rowExpressionService, "rowExpressionService is null"); - this.typeManager = requireNonNull(typeManager, "typeManager is null"); } @Override public ConnectorPlanOptimizerProvider getConnectorPlanOptimizerProvider() { - return new ClpPlanOptimizerProvider(functionManager, functionResolution, typeManager); + return new ClpPlanOptimizerProvider(functionManager, functionResolution); } @Override @@ -89,12 +77,6 @@ public ConnectorSplitManager getSplitManager() return splitManager; } - @Override - public ConnectorRecordSetProvider getRecordSetProvider() - { - return recordSetProvider; - } - @Override public final void shutdown() { diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java index 2360485910b13..e76f22c5efe47 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java @@ -15,7 +15,6 @@ import com.facebook.presto.common.function.OperatorType; import com.facebook.presto.common.type.Type; -import com.facebook.presto.common.type.TypeManager; import com.facebook.presto.common.type.VarcharType; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.PrestoException; @@ -49,18 +48,15 @@ public class ClpFilterToKqlConverter private final StandardFunctionResolution standardFunctionResolution; private final FunctionMetadataManager functionMetadataManager; - private final TypeManager typeManager; private final Map assignments; public ClpFilterToKqlConverter(StandardFunctionResolution standardFunctionResolution, FunctionMetadataManager functionMetadataManager, - TypeManager typeManager, Map assignments) { this.standardFunctionResolution = requireNonNull(standardFunctionResolution, "standardFunctionResolution is null"); this.functionMetadataManager = requireNonNull(functionMetadataManager, "function metadata manager is null"); - this.typeManager = requireNonNull(typeManager, "type manager is null"); this.assignments = requireNonNull(assignments, "assignments is null"); } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java b/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java index e73983a51ba17..3cc37900f5f89 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java @@ -34,11 +34,15 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.function.Function; + +import static java.util.Objects.requireNonNull; public class ClpMetadata implements ConnectorMetadata { private final ClpClient clpClient; + private static final String DEFAULT_SCHEMA_NAME = "default"; @Inject public ClpMetadata(ClpClient clpClient) @@ -49,29 +53,35 @@ public ClpMetadata(ClpClient clpClient) @Override public List listSchemaNames(ConnectorSession session) { - return ImmutableList.of("default"); + return ImmutableList.of(DEFAULT_SCHEMA_NAME); } @Override public List listTables(ConnectorSession session, Optional schemaName) { - return clpClient.listTables().stream() - .map(tableName -> new SchemaTableName("default", tableName)) + String schemaNameValue = schemaName.orElse(DEFAULT_SCHEMA_NAME); + if (!listSchemaNames(session).contains(schemaNameValue)) { + return ImmutableList.of(); + } + + return clpClient.listTables(schemaNameValue).stream() + .map(tableName -> new SchemaTableName(schemaNameValue, tableName)) .collect(ImmutableList.toImmutableList()); } @Override public ConnectorTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName) { - if (!listSchemaNames(session).contains(tableName.getSchemaName())) { + String schemaName = tableName.getSchemaName(); + if (!listSchemaNames(session).contains(schemaName)) { return null; } - if (!clpClient.listTables().contains(tableName.getTableName())) { + if (!clpClient.listTables(schemaName).contains(tableName.getTableName())) { return null; } - return new ClpTableHandle(tableName.getTableName()); + return new ClpTableHandle(tableName); } @Override @@ -95,30 +105,44 @@ public ConnectorTableLayout getTableLayout(ConnectorSession session, ConnectorTa public ConnectorTableMetadata getTableMetadata(ConnectorSession session, ConnectorTableHandle table) { ClpTableHandle clpTableHandle = (ClpTableHandle) table; - String tableName = clpTableHandle.getTableName(); - List columns = clpClient.listColumns(tableName).stream() + SchemaTableName schemaTableName = clpTableHandle.getSchemaTableName(); + List columns = clpClient.listColumns(schemaTableName).stream() .map(ClpColumnHandle::getColumnMetadata) .collect(ImmutableList.toImmutableList()); - return new ConnectorTableMetadata(new SchemaTableName("default", tableName), columns); + return new ConnectorTableMetadata(schemaTableName, columns); } @Override public Map> listTableColumns(ConnectorSession session, SchemaTablePrefix prefix) { - return clpClient.listTables().stream() + requireNonNull(prefix, "prefix is null"); + String schemaName = prefix.getSchemaName(); + if (schemaName != null && !schemaName.equals(DEFAULT_SCHEMA_NAME)) { + return ImmutableMap.of(); + } + + List schemaTableNames; + if (prefix.getTableName() == null) { + schemaTableNames = listTables(session, Optional.of(prefix.getSchemaName())); + } + else { + schemaTableNames = ImmutableList.of(new SchemaTableName(prefix.getSchemaName(), prefix.getTableName())); + } + + return schemaTableNames.stream() .collect(ImmutableMap.toImmutableMap( - tableName -> new SchemaTableName("default", tableName), - tableName -> getTableMetadata(session, - new ClpTableHandle(tableName)).getColumns())); + Function.identity(), + tableName -> getTableMetadata(session, getTableHandle(session, tableName)).getColumns() + )); } @Override public Map getColumnHandles(ConnectorSession session, ConnectorTableHandle tableHandle) { ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle; - return clpClient.listColumns(clpTableHandle.getTableName()).stream() + return clpClient.listColumns(clpTableHandle.getSchemaTableName()).stream() .collect(ImmutableMap.toImmutableMap( ClpColumnHandle::getColumnName, column -> column)); diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java index 2bdd9731df8cc..0de5c10889a01 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java @@ -14,7 +14,6 @@ package com.yscope.presto; import com.facebook.airlift.log.Logger; -import com.facebook.presto.common.type.TypeManager; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.ConnectorPlanOptimizer; import com.facebook.presto.spi.ConnectorPlanRewriter; @@ -41,15 +40,12 @@ public class ClpPlanOptimizer private static final Logger log = Logger.get(ClpPlanOptimizer.class); private final FunctionMetadataManager functionManager; private final StandardFunctionResolution functionResolution; - private final TypeManager typeManager; public ClpPlanOptimizer(FunctionMetadataManager functionManager, - StandardFunctionResolution functionResolution, - TypeManager typeManager) + StandardFunctionResolution functionResolution) { this.functionManager = functionManager; this.functionResolution = functionResolution; - this.typeManager = typeManager; } @Override @@ -83,7 +79,7 @@ public PlanNode visitFilter(FilterNode node, RewriteContext context) TableHandle tableHandle = tableScanNode.getTable(); ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle(); ClpExpression clpExpression = node.getPredicate() - .accept(new ClpFilterToKqlConverter(functionResolution, functionManager, typeManager, assignments), + .accept(new ClpFilterToKqlConverter(functionResolution, functionManager, assignments), null); Optional kqlQuery = clpExpression.getDefinition(); Optional remainingPredicate = clpExpression.getRemainingExpression(); diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizerProvider.java b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizerProvider.java index 73c5c695aa0f0..5b204fe46569a 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizerProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizerProvider.java @@ -13,7 +13,6 @@ */ package com.yscope.presto; -import com.facebook.presto.common.type.TypeManager; import com.facebook.presto.spi.ConnectorPlanOptimizer; import com.facebook.presto.spi.connector.ConnectorPlanOptimizerProvider; import com.facebook.presto.spi.function.FunctionMetadataManager; @@ -29,16 +28,13 @@ public class ClpPlanOptimizerProvider { private final FunctionMetadataManager functionManager; private final StandardFunctionResolution functionResolution; - private final TypeManager typeManager; @Inject public ClpPlanOptimizerProvider(FunctionMetadataManager functionManager, - StandardFunctionResolution functionResolution, - TypeManager typeManager) + StandardFunctionResolution functionResolution) { this.functionManager = functionManager; this.functionResolution = functionResolution; - this.typeManager = typeManager; } @Override @@ -50,6 +46,6 @@ public Set getLogicalPlanOptimizers() @Override public Set getPhysicalPlanOptimizers() { - return ImmutableSet.of(new ClpPlanOptimizer(functionManager, functionResolution, typeManager)); + return ImmutableSet.of(new ClpPlanOptimizer(functionManager, functionResolution)); } } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java deleted file mode 100644 index aa5d2fa38fd77..0000000000000 --- a/presto-clp/src/main/java/com/yscope/presto/ClpRecordCursor.java +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.yscope.presto; - -import com.facebook.airlift.log.Logger; -import com.facebook.presto.common.block.BlockBuilder; -import com.facebook.presto.common.type.Type; -import com.facebook.presto.spi.RecordCursor; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import io.airlift.slice.Slice; -import io.airlift.slice.Slices; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Map; - -import static com.facebook.presto.common.type.BigintType.BIGINT; -import static com.facebook.presto.common.type.BooleanType.BOOLEAN; -import static com.facebook.presto.common.type.DoubleType.DOUBLE; -import static com.facebook.presto.common.type.VarcharType.VARCHAR; -import static com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType; -import static com.google.common.base.Preconditions.checkArgument; - -public class ClpRecordCursor - implements RecordCursor -{ - private static final Logger log = Logger.get(ClpRecordCursor.class); - private final BufferedReader reader; - private final Process process; - private final boolean isPolymorphicTypeEnabled; - private final List columnHandles; - private final List fields; - - public ClpRecordCursor(ProcessBuilder processBuilder, boolean isPolymorphicTypeEnabled, List columnHandles) - { - try { - this.process = processBuilder.start(); - } - catch (IOException e) { - log.error(e, "Failed to search records"); - throw new RuntimeException(e); - } - this.reader = new BufferedReader(new InputStreamReader(process.getInputStream())); - this.isPolymorphicTypeEnabled = isPolymorphicTypeEnabled; - this.columnHandles = columnHandles; - this.fields = new ArrayList<>(columnHandles.size()); - for (int i = 0; i < columnHandles.size(); i++) { - fields.add(null); - } - } - - @Override - public long getCompletedBytes() - { - return 0; - } - - @Override - public long getReadTimeNanos() - { - return 0; - } - - @Override - public Type getType(int field) - { - return columnHandles.get(field).getColumnType(); - } - - @Override - public boolean advanceNextPosition() - { - try { - String line = reader.readLine(); - if (line == null) { - return false; - } - fields.replaceAll(ignored -> null); - JsonNode node = new ObjectMapper().readTree(line); - parseLine(node, ""); - } - catch (Exception e) { - return false; - } - - return true; - } - - private void checkFieldType(int field, Type expected) - { - Type actual = getType(field); - checkArgument(actual.equals(expected), "Expected field %s to be type %s but is %s", field, expected, actual); - } - - @Override - public boolean getBoolean(int field) - { - checkFieldType(field, BOOLEAN); - return fields.get(field).asBoolean(); - } - - @Override - public long getLong(int field) - { - checkFieldType(field, BIGINT); - return fields.get(field).asLong(); - } - - @Override - public double getDouble(int field) - { - checkFieldType(field, DOUBLE); - return fields.get(field).asDouble(); - } - - @Override - public Slice getSlice(int field) - { - checkFieldType(field, createUnboundedVarcharType()); - JsonNode node = fields.get(field); - return Slices.utf8Slice(node.asText()); - } - - @Override - public Object getObject(int field) - { - JsonNode node = fields.get(field); - if (node.isArray()) { - BlockBuilder builder = VARCHAR.createBlockBuilder(null, node.size()); - Iterator elements = node.elements(); - while (elements.hasNext()) { - VARCHAR.writeString(builder, elements.next().asText()); - } - return builder.build(); - } - throw new UnsupportedOperationException(); - } - - @Override - public boolean isNull(int field) - { - return fields.get(field) == null || fields.get(field).isNull(); - } - - @Override - public void close() - { - try { - reader.close(); - } - catch (IOException e) { - log.warn(e, "Failed to close reader"); - } - process.destroy(); - } - - private void parseLine(JsonNode node, String prefix) - { - if (node.isObject()) { - Iterator> fields = node.fields(); - while (fields.hasNext()) { - Map.Entry field = fields.next(); - String key = field.getKey(); - JsonNode value = field.getValue(); - parseLine(value, prefix.isEmpty() ? key : prefix + "." + key); - } - } - else { - int index = getFieldIndex(prefix, node); - if (index == -1) { - return; - } - fields.set(index, node); - } - } - - private String jsonNodeToTypeString(JsonNode node) - { - if (node.isIntegralNumber()) { - return BIGINT.getDisplayName(); - } - if (node.isFloatingPointNumber()) { - return DOUBLE.getDisplayName(); - } - if (node.isBoolean()) { - return BOOLEAN.getDisplayName(); - } - if (node.isTextual() || node.isArray() || node.isNull()) { - return VARCHAR.getDisplayName(); - } - return "unknown"; - } - - private int getFieldIndex(String fieldName, JsonNode node) - { - for (int i = 0; i < columnHandles.size(); i++) { - if (columnHandles.get(i).getColumnName().equals(fieldName)) { - return i; - } - - if (isPolymorphicTypeEnabled && (fieldName + "_" + jsonNodeToTypeString(node)).equals(columnHandles.get(i) - .getColumnName())) { - return i; - } - } - return -1; - } -} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java deleted file mode 100644 index f561d0b0d2c2e..0000000000000 --- a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSet.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.yscope.presto; - -import com.facebook.presto.common.type.Type; -import com.facebook.presto.spi.RecordCursor; -import com.facebook.presto.spi.RecordSet; -import com.google.common.collect.ImmutableList; - -import java.util.List; - -import static java.util.Objects.requireNonNull; - -public class ClpRecordSet - implements RecordSet -{ - private final ProcessBuilder processBuilder; - private final List columnHandles; - private final boolean isPolymorphicTypeEnabled; - - public ClpRecordSet(ProcessBuilder processBuilder, boolean isPolymorphicTypeEnabled, List columnHandles) - { - this.processBuilder = requireNonNull(processBuilder, "process builder is null"); - this.isPolymorphicTypeEnabled = isPolymorphicTypeEnabled; - this.columnHandles = requireNonNull(columnHandles, "column handles is null"); - } - - @Override - public List getColumnTypes() - { - return columnHandles.stream().map(ClpColumnHandle::getColumnType).collect(ImmutableList.toImmutableList()); - } - - @Override - public RecordCursor cursor() - { - return new ClpRecordCursor(processBuilder, isPolymorphicTypeEnabled, columnHandles); - } -} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java b/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java deleted file mode 100644 index 84070d1387e9b..0000000000000 --- a/presto-clp/src/main/java/com/yscope/presto/ClpRecordSetProvider.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.yscope.presto; - -import com.facebook.presto.spi.ColumnHandle; -import com.facebook.presto.spi.ConnectorSession; -import com.facebook.presto.spi.ConnectorSplit; -import com.facebook.presto.spi.RecordSet; -import com.facebook.presto.spi.connector.ConnectorRecordSetProvider; -import com.facebook.presto.spi.connector.ConnectorTransactionHandle; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; - -import javax.inject.Inject; - -import java.util.List; - -public class ClpRecordSetProvider - implements ConnectorRecordSetProvider -{ - ClpClient clpClient; - - @Inject - public ClpRecordSetProvider(ClpClient clpClient) - { - this.clpClient = clpClient; - } - - @Override - public RecordSet getRecordSet(ConnectorTransactionHandle transactionHandle, - ConnectorSession session, - ConnectorSplit split, - List columns) - { - ClpSplit clpSplit = (ClpSplit) split; - ImmutableList.Builder handles = ImmutableList.builder(); - for (ColumnHandle handle : columns) { - handles.add((ClpColumnHandle) handle); - } - ImmutableList clpColumnHandles = handles.build(); - return new ClpRecordSet(clpClient.getRecords(clpSplit.getTableName(), - clpSplit.getArchiveId(), - clpSplit.getQuery(), - clpColumnHandles.stream() - .map(ClpColumnHandle::getOriginalColumnName) - .collect(ImmutableSet.toImmutableSet()) - .asList()), - clpClient.getConfig().isPolymorphicTypeEnabled(), clpColumnHandles); - } -} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java b/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java index f0a179d9d49b0..473b734e100ea 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java @@ -14,6 +14,7 @@ package com.yscope.presto; import com.facebook.presto.spi.ConnectorTableHandle; +import com.facebook.presto.spi.SchemaTableName; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; @@ -22,24 +23,24 @@ public class ClpTableHandle implements ConnectorTableHandle { - private final String tableName; + private final SchemaTableName schemaTableName; @JsonCreator - public ClpTableHandle(@JsonProperty("tableName") String tableName) + public ClpTableHandle(@JsonProperty("schemaTableName") SchemaTableName schemaTableName) { - this.tableName = tableName; + this.schemaTableName = schemaTableName; } @JsonProperty - public String getTableName() + public SchemaTableName getSchemaTableName() { - return tableName; + return schemaTableName; } @Override public int hashCode() { - return Objects.hash(tableName); + return Objects.hash(schemaTableName); } @Override @@ -52,12 +53,12 @@ public boolean equals(Object obj) return false; } ClpTableHandle other = (ClpTableHandle) obj; - return this.tableName.equals(other.tableName); + return this.schemaTableName.equals(other.schemaTableName); } @Override public String toString() { - return tableName; + return schemaTableName.toString(); } } diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpFilterToKqlConverter.java b/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java similarity index 82% rename from presto-clp/src/test/java/com/yscope/presto/TestClpFilterToKqlConverter.java rename to presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java index 8ca347435f94e..fce3dd2883f8a 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpFilterToKqlConverter.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java @@ -16,6 +16,7 @@ import com.facebook.presto.common.type.BigintType; import com.facebook.presto.metadata.FunctionAndTypeManager; import com.facebook.presto.spi.ColumnHandle; +import com.facebook.presto.spi.ConnectorId; import com.facebook.presto.spi.relation.CallExpression; import com.facebook.presto.spi.relation.RowExpression; import com.facebook.presto.spi.relation.SpecialFormExpression; @@ -31,7 +32,10 @@ import static com.facebook.presto.common.function.OperatorType.EQUAL; import static com.facebook.presto.common.function.OperatorType.GREATER_THAN; +import static com.facebook.presto.common.type.BigintType.BIGINT; import static com.facebook.presto.common.type.BooleanType.BOOLEAN; +import static com.facebook.presto.common.type.DoubleType.DOUBLE; +import static com.facebook.presto.common.type.TimestampType.TIMESTAMP; import static com.facebook.presto.common.type.VarcharType.VARCHAR; import static com.facebook.presto.metadata.CastType.CAST; import static com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager; @@ -43,8 +47,59 @@ import static org.testng.Assert.assertTrue; @Test(singleThreaded = true) -public class TestClpFilterToKqlConverter +public class TestClpPlanOptimizer { + private static ConnectorId druidConnectorId = new ConnectorId("id"); + private static ClpTableHandle realtimeOnlyTable = new ClpTableHandle("schema", "realtimeOnly", Optional.empty()); + private static ClpTableHandle hybridTable = new ClpTableHandle("schema", "hybrid", Optional.empty()); + private static ClpColumnHandle regionId = new ClpColumnHandle("region.Id", BIGINT, REGULAR); + private static ClpColumnHandle city = new ClpColumnHandle("city", VARCHAR, REGULAR); + private static final ClpColumnHandle fare = new ClpColumnHandle("fare", DOUBLE, REGULAR); + private static final ClpColumnHandle secondsSinceEpoch = new ClpColumnHandle("secondsSinceEpoch", BIGINT, REGULAR); + private static final ClpColumnHandle datetime = new ClpColumnHandle("datetime", TIMESTAMP, REGULAR); + + @Test + public void testStringMatchPushdown() { + + } + + @Test + public void testNumericComparisonPushdown() { + + } + + @Test + public void testOrPushdown() { + + } + + @Test + public void testAndPushdown() { + + } + + @Test + public void testNotPushdown() { + + } + + @Test + public void testInPushdown() { + + } + + @Test + public void testComplexPushdown() { + + } + + + + + + + + @Test public void testSqlToKqlConverter() { @@ -103,7 +158,6 @@ public void testSqlToKqlConverter() andExpression.accept(new ClpFilterToKqlConverter( new FunctionResolution(functionAndTypeManager.getFunctionAndTypeResolver()), functionAndTypeManager, - functionAndTypeManager, assignments), null); Optional definition = clpExpression.getDefinition(); diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java b/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java deleted file mode 100644 index 8ce964e32533a..0000000000000 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpRecordCursor.java +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.yscope.presto; - -import com.facebook.presto.common.type.BigintType; -import com.facebook.presto.metadata.FunctionAndTypeManager; -import com.facebook.presto.spi.ColumnHandle; -import com.facebook.presto.spi.relation.CallExpression; -import com.facebook.presto.spi.relation.VariableReferenceExpression; -import com.facebook.presto.sql.relational.FunctionResolution; -import com.google.common.collect.ImmutableList; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Optional; - -import static com.facebook.presto.common.function.OperatorType.EQUAL; -import static com.facebook.presto.common.type.BooleanType.BOOLEAN; -import static com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager; -import static com.facebook.presto.sql.analyzer.TypeSignatureProvider.fromTypes; -import static com.facebook.presto.sql.relational.Expressions.constant; -import static com.facebook.presto.testing.TestingConnectorSession.SESSION; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertNotNull; -import static org.testng.Assert.assertTrue; - -@Test(singleThreaded = true) -public class TestClpRecordCursor -{ - private ClpClient clpClient; - - @BeforeMethod - public void setUp() - { - ClpConfig config = new ClpConfig().setClpArchiveDir("src/test/resources/clp_archive") - .setPolymorphicTypeEnabled(true) - .setClpExecutablePath("/usr/local/bin/clp-s-projection"); - clpClient = new ClpClient(config); - clpClient.start(); - } - - public void assertNull(ClpRecordCursor cursor, List indices) - { - for (int index : indices) { - assertTrue(cursor.isNull(index)); - } - } - - @Test - public void testTable1RecordCursor() - { - ClpRecordSetProvider recordSetProvider = new ClpRecordSetProvider(clpClient); - List archiveIds = clpClient.listArchiveIds("test_1_table"); - ClpRecordSet recordSet = (ClpRecordSet) recordSetProvider.getRecordSet( - ClpTransactionHandle.INSTANCE, - SESSION, - new ClpSplit("default", "test_1_table", archiveIds.get(0), Optional.empty()), - new ArrayList<>(clpClient.listColumns("test_1_table"))); - assertNotNull(recordSet, "recordSet is null"); - ClpRecordCursor cursor = (ClpRecordCursor) recordSet.cursor(); - assertNotNull(cursor, "cursor is null"); - assertTrue(cursor.advanceNextPosition()); - assertEquals(cursor.getLong(0), 1); - assertEquals(cursor.getDouble(2), 2.0); - assertTrue(cursor.getBoolean(5)); - assertEquals(cursor.getSlice(6).toStringUtf8(), "Hello world"); - assertNull(cursor, List.of(1, 3, 4)); - assertTrue(cursor.advanceNextPosition()); - assertEquals(cursor.getLong(0), 2); - assertEquals(cursor.getDouble(2), 3.0); - assertFalse(cursor.getBoolean(5)); - assertEquals(cursor.getSlice(6).toStringUtf8(), "Goodbye world"); - assertNull(cursor, List.of(1, 3, 4)); - assertTrue(cursor.advanceNextPosition()); - assertEquals(cursor.getSlice(1).toStringUtf8(), "foo"); - assertEquals(cursor.getSlice(3).toStringUtf8(), "bar"); - assertEquals(cursor.getDouble(4), 2.0); - assertNull(cursor, List.of(0, 2, 5, 6)); - assertTrue(cursor.advanceNextPosition()); - assertEquals(cursor.getSlice(1).toStringUtf8(), "baz"); - assertEquals(cursor.getSlice(3).toStringUtf8(), "qux"); - assertNull(cursor, List.of(0, 2, 4, 5, 6)); - assertFalse(cursor.advanceNextPosition()); - } - - @Test - public void testTable2RecordCursor() - { - ClpRecordSetProvider recordSetProvider = new ClpRecordSetProvider(clpClient); - List archiveIds = clpClient.listArchiveIds("test_2_table"); - ClpRecordSet recordSet = (ClpRecordSet) recordSetProvider.getRecordSet( - ClpTransactionHandle.INSTANCE, - SESSION, - new ClpSplit("default", "test_2_table", archiveIds.get(0), Optional.empty()), - new ArrayList<>(clpClient.listColumns("test_2_table"))); - assertNotNull(recordSet, "recordSet is null"); - ClpRecordCursor cursor = (ClpRecordCursor) recordSet.cursor(); - assertNotNull(cursor, "cursor is null"); - for (int i = 0; i <= 12; i++) { - assertTrue(cursor.advanceNextPosition()); - assertEquals(cursor.getLong(3), i); - } - assertFalse(cursor.advanceNextPosition()); - } - - @Test - public void testPredicate() - { - FunctionAndTypeManager functionAndTypeManager = createTestFunctionAndTypeManager(); - CallExpression callExpression = - new CallExpression(EQUAL.name(), - functionAndTypeManager.resolveOperator(EQUAL, fromTypes( - BigintType.BIGINT, BigintType.BIGINT)), - BOOLEAN, - ImmutableList.of(new VariableReferenceExpression(Optional.empty(), - "a_bigint", - BigintType.BIGINT), - constant(1L, BigintType.BIGINT))); - Map assignments = Map.of( - new VariableReferenceExpression(Optional.empty(), "a_bigint", BigintType.BIGINT), - new ClpColumnHandle("a_bigint", "a", BigintType.BIGINT, false)); - Optional query = - callExpression.accept(new ClpFilterToKqlConverter( - new FunctionResolution(functionAndTypeManager.getFunctionAndTypeResolver()), - functionAndTypeManager, functionAndTypeManager, assignments), null).getDefinition(); - assertTrue(query.isPresent()); - ClpRecordSetProvider recordSetProvider = new ClpRecordSetProvider(clpClient); - List archiveIds = clpClient.listArchiveIds("test_1_table"); - ClpRecordSet recordSet = (ClpRecordSet) recordSetProvider.getRecordSet( - ClpTransactionHandle.INSTANCE, - SESSION, - new ClpSplit("default", "test_1_table", archiveIds.get(0), query), - new ArrayList<>(clpClient.listColumns("test_1_table"))); - assertNotNull(recordSet, "recordSet is null"); - ClpRecordCursor cursor = (ClpRecordCursor) recordSet.cursor(); - assertNotNull(cursor, "cursor is null"); - assertTrue(cursor.advanceNextPosition()); - assertEquals(cursor.getLong(0), 1); - assertEquals(cursor.getDouble(2), 2.0); - assertTrue(cursor.getBoolean(5)); - assertEquals(cursor.getSlice(6).toStringUtf8(), "Hello world"); - assertNull(cursor, List.of(1, 3, 4)); - assertFalse(cursor.advanceNextPosition()); - } - - @AfterMethod - public void tearDown() - { - clpClient.close(); - } -} From 34bfc3b7927a033428ffa6f73b04cd0ce26d6313 Mon Sep 17 00:00:00 2001 From: rwang22 Date: Fri, 28 Feb 2025 02:33:05 +0000 Subject: [PATCH 078/126] apply prestissimo changes --- .../presto_cpp/main/types/PrestoToVeloxConnector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp index 811ef98e5549c..35116ff9b972d 100644 --- a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp +++ b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp @@ -1599,7 +1599,7 @@ ClpPrestoToVeloxConnector::toVeloxTableHandle( "Unexpected layout type {}", tableHandle.connectorTableLayout->_type); return std::make_unique( - tableHandle.connectorId, clpLayout->table.tableName, clpLayout->query); + tableHandle.connectorId, clpLayout->table.schemaTableName.table, clpLayout->query); } std::unique_ptr From d431a7abd177047379e26f4ba977933838542fa8 Mon Sep 17 00:00:00 2001 From: rwang22 Date: Tue, 4 Mar 2025 15:12:07 +0000 Subject: [PATCH 079/126] add test cases for PlanOptimizer add one more transformation rule in ClpPlanOptimizer add metadata provider template --- .../java/com/yscope/presto/ClpClient.java | 5 +- .../presto/ClpFilterToKqlConverter.java | 6 +- .../presto/metadata/ClpMetadataProvider.java | 11 + .../metadata/ClpMySQLMetadataProvider.java | 5 + .../ClpSchemaNode.java} | 6 +- .../com/yscope/presto/schema/SchemaTree.java | 68 ------ .../yscope/presto/TestClpPlanOptimizer.java | 203 +++++++----------- .../com/yscope/presto/TestClpQueryBase.java | 96 +++++++++ 8 files changed, 194 insertions(+), 206 deletions(-) create mode 100644 presto-clp/src/main/java/com/yscope/presto/metadata/ClpMetadataProvider.java create mode 100644 presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java rename presto-clp/src/main/java/com/yscope/presto/{schema/SchemaNode.java => metadata/ClpSchemaNode.java} (96%) delete mode 100644 presto-clp/src/main/java/com/yscope/presto/schema/SchemaTree.java create mode 100644 presto-clp/src/test/java/com/yscope/presto/TestClpQueryBase.java diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 40c0a4e7165f9..24c8a53e9951b 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -26,7 +26,8 @@ import com.google.common.cache.LoadingCache; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; -import com.yscope.presto.schema.SchemaNode; +import com.yscope.presto.metadata.ClpSchemaNode; + import javax.inject.Inject; import java.nio.file.DirectoryStream; @@ -105,7 +106,7 @@ public Set loadTableSchema(SchemaTableName schemaTableName) while (resultSet.next()) { String columnName = resultSet.getString("name"); - SchemaNode.NodeType columnType = SchemaNode.NodeType.fromType(resultSet.getByte("type")); + ClpSchemaNode.NodeType columnType = ClpSchemaNode.NodeType.fromType(resultSet.getByte("type")); Type prestoType = null; switch (columnType) { case Integer: diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java index e76f22c5efe47..d85705e105cb9 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java @@ -41,6 +41,8 @@ import static com.yscope.presto.ClpErrorCode.CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION; import static java.util.Objects.requireNonNull; +// TODO: Correctly handle escaping of special characters in LIKE expressions +// TODO: Consider whether it handles is NULL and is NOT NULL expressions correctly public class ClpFilterToKqlConverter implements RowExpressionVisitor { @@ -189,7 +191,7 @@ private ClpExpression handleLike(CallExpression node) if (argument instanceof ConstantExpression) { ConstantExpression literal = (ConstantExpression) argument; String literalString = getLiteralString(literal); - return new ClpExpression(variableName + ": \"" + literalString.replace("%", "*") + "\""); + return new ClpExpression(variableName + ": \"" + literalString.replace("%", "*").replace("_", "?") + "\""); } else if (argument instanceof CallExpression) { CallExpression callExpression = (CallExpression) argument; @@ -206,7 +208,7 @@ else if (argument instanceof CallExpression) { } ConstantExpression literal = (ConstantExpression) callExpression.getArguments().get(0); String literalString = getLiteralString(literal); - return new ClpExpression(variableName + ": \"" + literalString.replace("%", "*") + "\""); + return new ClpExpression(variableName + ": \"" + literalString.replace("%", "*").replace("_", "?") + "\""); } return new ClpExpression(node); } diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMetadataProvider.java b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMetadataProvider.java new file mode 100644 index 0000000000000..710e643b341a2 --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMetadataProvider.java @@ -0,0 +1,11 @@ +package com.yscope.presto.metadata; + +import com.facebook.presto.spi.SchemaTableName; +import com.yscope.presto.ClpColumnHandle; + +import java.util.Set; + +public interface ClpMetadataProvider { + public Set loadTableSchema(SchemaTableName schemaTableName); + +} diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java new file mode 100644 index 0000000000000..7bc153081f28d --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java @@ -0,0 +1,5 @@ +package com.yscope.presto.metadata; + +public class ClpMySQLMetadataProvider implements ClpMetadataProvider{ + +} diff --git a/presto-clp/src/main/java/com/yscope/presto/schema/SchemaNode.java b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaNode.java similarity index 96% rename from presto-clp/src/main/java/com/yscope/presto/schema/SchemaNode.java rename to presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaNode.java index 18c010d784196..8e1678234970b 100644 --- a/presto-clp/src/main/java/com/yscope/presto/schema/SchemaNode.java +++ b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaNode.java @@ -11,12 +11,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto.schema; +package com.yscope.presto.metadata; import java.util.ArrayList; import java.util.Objects; -public class SchemaNode +public class ClpSchemaNode { private final int id; private final int parentId; @@ -24,7 +24,7 @@ public class SchemaNode private final String name; private final NodeType type; - public SchemaNode(int id, int parentId, String name, NodeType type) + public ClpSchemaNode(int id, int parentId, String name, NodeType type) { this.id = id; this.parentId = parentId; diff --git a/presto-clp/src/main/java/com/yscope/presto/schema/SchemaTree.java b/presto-clp/src/main/java/com/yscope/presto/schema/SchemaTree.java deleted file mode 100644 index 9791804949124..0000000000000 --- a/presto-clp/src/main/java/com/yscope/presto/schema/SchemaTree.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.yscope.presto.schema; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; - -public class SchemaTree -{ - private final ArrayList schemaNodes; - private final Map nodeMap; - private final ArrayList primitiveTypeFields; - - public SchemaTree() - { - schemaNodes = new ArrayList<>(); - primitiveTypeFields = new ArrayList<>(); - nodeMap = new HashMap<>(); - } - - public ArrayList getPrimitiveFields() - { - return primitiveTypeFields; - } - - public int addNode(int parentId, String name, SchemaNode.NodeType type) - { - SchemaNode.NodeTuple tuple = new SchemaNode.NodeTuple(parentId, name, type); - if (nodeMap.containsKey(tuple)) { - return nodeMap.get(tuple); - } - - int id = schemaNodes.size(); - schemaNodes.add(new SchemaNode(id, parentId, name, type)); - nodeMap.put(tuple, id); - - if (parentId >= 0) { - schemaNodes.get(parentId).addChild(id); - } - - if (type != SchemaNode.NodeType.Object) { - primitiveTypeFields.add(new SchemaNode.NodeTuple(getKeyName(parentId, name), type)); - } - return id; - } - - private String getKeyName(int id, String key) - { - SchemaNode node = schemaNodes.get(id); - if (node.getParentId() < 0) { - return key; - } - - return getKeyName(node.getParentId(), node.getName() + "." + key); - } -} diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java b/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java index fce3dd2883f8a..b001aeb53e6ba 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java @@ -13,170 +13,111 @@ */ package com.yscope.presto; -import com.facebook.presto.common.type.BigintType; -import com.facebook.presto.metadata.FunctionAndTypeManager; -import com.facebook.presto.spi.ColumnHandle; -import com.facebook.presto.spi.ConnectorId; -import com.facebook.presto.spi.relation.CallExpression; import com.facebook.presto.spi.relation.RowExpression; -import com.facebook.presto.spi.relation.SpecialFormExpression; -import com.facebook.presto.spi.relation.VariableReferenceExpression; -import com.facebook.presto.sql.analyzer.FunctionAndTypeResolver; -import com.facebook.presto.sql.relational.FunctionResolution; -import com.google.common.collect.ImmutableList; -import io.airlift.slice.Slices; import org.testng.annotations.Test; -import java.util.Map; import java.util.Optional; -import static com.facebook.presto.common.function.OperatorType.EQUAL; -import static com.facebook.presto.common.function.OperatorType.GREATER_THAN; -import static com.facebook.presto.common.type.BigintType.BIGINT; -import static com.facebook.presto.common.type.BooleanType.BOOLEAN; -import static com.facebook.presto.common.type.DoubleType.DOUBLE; -import static com.facebook.presto.common.type.TimestampType.TIMESTAMP; -import static com.facebook.presto.common.type.VarcharType.VARCHAR; -import static com.facebook.presto.metadata.CastType.CAST; -import static com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager; -import static com.facebook.presto.sql.analyzer.TypeSignatureProvider.fromTypes; -import static com.facebook.presto.sql.relational.Expressions.call; -import static com.facebook.presto.sql.relational.Expressions.constant; -import static com.facebook.presto.type.LikePatternType.LIKE_PATTERN; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertTrue; - -@Test(singleThreaded = true) -public class TestClpPlanOptimizer -{ - private static ConnectorId druidConnectorId = new ConnectorId("id"); - private static ClpTableHandle realtimeOnlyTable = new ClpTableHandle("schema", "realtimeOnly", Optional.empty()); - private static ClpTableHandle hybridTable = new ClpTableHandle("schema", "hybrid", Optional.empty()); - private static ClpColumnHandle regionId = new ClpColumnHandle("region.Id", BIGINT, REGULAR); - private static ClpColumnHandle city = new ClpColumnHandle("city", VARCHAR, REGULAR); - private static final ClpColumnHandle fare = new ClpColumnHandle("fare", DOUBLE, REGULAR); - private static final ClpColumnHandle secondsSinceEpoch = new ClpColumnHandle("secondsSinceEpoch", BIGINT, REGULAR); - private static final ClpColumnHandle datetime = new ClpColumnHandle("datetime", TIMESTAMP, REGULAR); - +import static org.testng.Assert.*; + +@Test() +public class TestClpPlanOptimizer extends TestClpQueryBase { + private void testFilter(String sqlExpression, Optional expectedKqlExpression, + Optional expectedRemainingExpression, SessionHolder sessionHolder) { + RowExpression pushDownExpression = getRowExpression(sqlExpression, sessionHolder); + ClpExpression clpExpression = pushDownExpression.accept(new ClpFilterToKqlConverter( + standardFunctionResolution, + functionAndTypeManager, + variableToColumnHandleMap), + null); + Optional kqlExpression = clpExpression.getDefinition(); + Optional remainingExpression = clpExpression.getRemainingExpression(); + if (expectedKqlExpression.isPresent()) { + assertTrue(kqlExpression.isPresent()); + assertEquals(kqlExpression.get(), expectedKqlExpression.get()); + } else { + assertFalse(kqlExpression.isPresent()); + } + + if (expectedRemainingExpression.isPresent()) { + assertTrue(remainingExpression.isPresent()); + assertEquals(remainingExpression.get(), getRowExpression(expectedRemainingExpression.get(), sessionHolder)); + } else { + assertFalse(remainingExpression.isPresent()); + } + } + @Test public void testStringMatchPushdown() { - + SessionHolder sessionHolder = new SessionHolder(); + + testFilter("city = 'hello world'", Optional.of("city: \"hello world\""), Optional.empty(), sessionHolder); + testFilter("city != 'hello world'", Optional.of("NOT city: \"hello world\""), Optional.empty(), sessionHolder); + testFilter("city like 'hello%'", Optional.of("city: \"hello*\""), Optional.empty(), sessionHolder); + testFilter("city not like 'hello%'", Optional.of("NOT city: \"hello*\""), Optional.empty(), sessionHolder); + testFilter("city like 'hello_'", Optional.of("city: \"hello?\""), Optional.empty(), sessionHolder); + testFilter("city not like 'hello_'", Optional.of("NOT city: \"hello?\""), Optional.empty(), sessionHolder); + testFilter("city like 'hello_w%'", Optional.of("city: \"hello?w*\""), Optional.empty(), sessionHolder); + testFilter("city not like 'hello_w%'", Optional.of("NOT city: \"hello?w*\""), Optional.empty(), sessionHolder); } @Test public void testNumericComparisonPushdown() { - + SessionHolder sessionHolder = new SessionHolder(); + + testFilter("fare > 0", Optional.of("fare > 0"), Optional.empty(), sessionHolder); + testFilter("fare >= 0", Optional.of("fare >= 0"), Optional.empty(), sessionHolder); + testFilter("fare < 0", Optional.of("fare < 0"), Optional.empty(), sessionHolder); + testFilter("fare <= 0", Optional.of("fare <= 0"), Optional.empty(), sessionHolder); + testFilter("fare = 0", Optional.of("fare: 0"), Optional.empty(), sessionHolder); + testFilter("fare != 0", Optional.of("NOT fare: 0"), Optional.empty(), sessionHolder); } @Test public void testOrPushdown() { + SessionHolder sessionHolder = new SessionHolder(); + testFilter("fare > 0 OR city like 'b%'", Optional.of("(fare > 0 OR city: \"b*\")"), Optional.empty(), + sessionHolder); + testFilter("\"lower(region.Name)\" = 'hello world' OR region.Id != 1", Optional.empty(), Optional.of("(lower(\"region.Name\") = 'hello world' OR NOT region.Id: 1)"), + sessionHolder); } @Test public void testAndPushdown() { + SessionHolder sessionHolder = new SessionHolder(); + testFilter("fare > 0 AND city like 'b%'", Optional.of("(fare > 0 AND city: \"b*\""), Optional.empty(), sessionHolder); + testFilter("lower(\"region.Name\") = 'hello world' AND region.Id != 1", Optional.of("NOT region.Id: 1"), Optional.of("lower(\"region.Name\") = 'hello world'"), + sessionHolder); } @Test public void testNotPushdown() { - + SessionHolder sessionHolder = new SessionHolder(); + + testFilter("region.Name NOT LIKE 'hello%'", Optional.of("NOT region.Name: \"hello*\""), Optional.empty(), sessionHolder); + testFilter("NOT (region.Name LIKE 'hello%')", Optional.of("NOT region.Name: \"hello*\""), Optional.empty(), sessionHolder); + testFilter("city != 'hello world'", Optional.of("NOT city: \"hello world\""), Optional.empty(), sessionHolder); + testFilter("city <> 'hello world'", Optional.of("NOT city: \"hello world\""), Optional.empty(), sessionHolder); + testFilter("NOT (city = 'hello world')", Optional.of("NOT city: \"hello world\""), Optional.empty(), sessionHolder); + testFilter("fare != 0", Optional.of("NOT fare: 0"), Optional.empty(), sessionHolder); + testFilter("fare <> 0", Optional.of("NOT fare: 0"), Optional.empty(), sessionHolder); + testFilter("NOT (fare = 0)", Optional.of("NOT fare: 0"), Optional.empty(), sessionHolder); } @Test public void testInPushdown() { + SessionHolder sessionHolder = new SessionHolder(); + testFilter("city IN ('hello world', 'hello world 2')", Optional.of("city: (\"hello world\" OR \"hello world 2\")"), Optional.empty(), sessionHolder); } @Test public void testComplexPushdown() { - - } - - - - - - - - - @Test - public void testSqlToKqlConverter() - { - FunctionAndTypeManager functionAndTypeManager = createTestFunctionAndTypeManager(); - FunctionResolution functionResolution = - new FunctionResolution(functionAndTypeManager.getFunctionAndTypeResolver()); - FunctionAndTypeResolver functionAndTypeResolver = functionAndTypeManager.getFunctionAndTypeResolver(); - // (a > 0 OR b like 'b%') AND (lower(c.e) = 'hello world' OR c IS NULL) - SpecialFormExpression firstOrExpression = - new SpecialFormExpression(SpecialFormExpression.Form.OR, - BOOLEAN, - new CallExpression(GREATER_THAN.name(), - functionAndTypeManager.resolveOperator(GREATER_THAN, fromTypes( - BigintType.BIGINT, BigintType.BIGINT)), - BOOLEAN, - ImmutableList.of(new VariableReferenceExpression(Optional.empty(), - "a_bigint", - BigintType.BIGINT), - constant(0L, BigintType.BIGINT))), - call("LIKE", - functionResolution.likeVarcharFunction(), - BOOLEAN, - new VariableReferenceExpression(Optional.empty(), "b_varchar", - VARCHAR), - call(CAST.name(), - functionAndTypeResolver.lookupCast("CAST", VARCHAR, LIKE_PATTERN), - LIKE_PATTERN, - constant(Slices.utf8Slice("b%"), VARCHAR)))); - SpecialFormExpression secondOrExpression = - new SpecialFormExpression(SpecialFormExpression.Form.OR, - BOOLEAN, - call(EQUAL.name(), functionResolution.comparisonFunction(EQUAL, VARCHAR, VARCHAR), BOOLEAN, - call("lower", - functionAndTypeResolver.lookupFunction("lower", fromTypes(VARCHAR)), - VARCHAR, - new VariableReferenceExpression(Optional.empty(), "c.e", - VARCHAR)), - constant(Slices.utf8Slice("hello world"), VARCHAR)), - new SpecialFormExpression(SpecialFormExpression.Form.IS_NULL, - BOOLEAN, - new VariableReferenceExpression(Optional.empty(), "c", VARCHAR))); - SpecialFormExpression andExpression = new SpecialFormExpression(SpecialFormExpression.Form.AND, - BOOLEAN, - firstOrExpression, - secondOrExpression); - Map assignments = Map.of( - new VariableReferenceExpression(Optional.empty(), "a_bigint", BigintType.BIGINT), - new ClpColumnHandle("a_bigint", BigintType.BIGINT, false), - new VariableReferenceExpression(Optional.empty(), "b_varchar", VARCHAR), - new ClpColumnHandle("b_varchar", VARCHAR, false), - new VariableReferenceExpression(Optional.empty(), "c.e", VARCHAR), - new ClpColumnHandle("c.e", VARCHAR, false), - new VariableReferenceExpression(Optional.empty(), "c", VARCHAR), - new ClpColumnHandle("c", VARCHAR, false)); - ClpExpression clpExpression = - andExpression.accept(new ClpFilterToKqlConverter( - new FunctionResolution(functionAndTypeManager.getFunctionAndTypeResolver()), - functionAndTypeManager, - assignments), - null); - Optional definition = clpExpression.getDefinition(); - Optional remainingExpression = clpExpression.getRemainingExpression(); - assertTrue(definition.isPresent()); - assertTrue(remainingExpression.isPresent()); - assertEquals(definition.get(), "((a > 0 OR b: \"b*\"))"); - assertEquals(remainingExpression.get(), new SpecialFormExpression(SpecialFormExpression.Form.OR, - BOOLEAN, - call(EQUAL.name(), functionResolution.comparisonFunction(EQUAL, VARCHAR, VARCHAR), BOOLEAN, - call("lower", - - functionAndTypeResolver.lookupFunction("lower", fromTypes(VARCHAR)), - VARCHAR, - new VariableReferenceExpression(Optional.empty(), "c.e", - VARCHAR)), - constant(Slices.utf8Slice("hello world"), VARCHAR)), - new SpecialFormExpression(SpecialFormExpression.Form.IS_NULL, - BOOLEAN, - new VariableReferenceExpression(Optional.empty(), "c", VARCHAR)))); + testFilter("(fare > 0 OR city like 'b%') AND (lower(\"region.Name\") = 'hello world' OR city IS NULL)", + Optional.of("((fare > 0 OR city: \"b*\"))"), + Optional.of("(lower(\"region.Name\") = 'hello world' OR city IS NULL)"), + new SessionHolder()); } } diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpQueryBase.java b/presto-clp/src/test/java/com/yscope/presto/TestClpQueryBase.java new file mode 100644 index 0000000000000..c30f02c239b6c --- /dev/null +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpQueryBase.java @@ -0,0 +1,96 @@ +package com.yscope.presto; + +import com.facebook.presto.Session; +import com.facebook.presto.SystemSessionProperties; +import com.facebook.presto.common.type.Type; +import com.facebook.presto.metadata.FunctionAndTypeManager; +import com.facebook.presto.metadata.Metadata; +import com.facebook.presto.metadata.MetadataManager; +import com.facebook.presto.metadata.SessionPropertyManager; +import com.facebook.presto.spi.ColumnHandle; +import com.facebook.presto.spi.ConnectorSession; +import com.facebook.presto.spi.WarningCollector; +import com.facebook.presto.spi.function.StandardFunctionResolution; +import com.facebook.presto.spi.relation.RowExpression; +import com.facebook.presto.spi.relation.VariableReferenceExpression; +import com.facebook.presto.sql.ExpressionUtils; +import com.facebook.presto.sql.parser.ParsingOptions; +import com.facebook.presto.sql.parser.SqlParser; +import com.facebook.presto.sql.planner.TypeProvider; +import com.facebook.presto.sql.relational.FunctionResolution; +import com.facebook.presto.sql.relational.SqlToRowExpressionTranslator; +import com.facebook.presto.sql.tree.Expression; +import com.facebook.presto.sql.tree.NodeRef; +import com.facebook.presto.testing.TestingSession; +import com.google.common.collect.ImmutableMap; + +import java.util.Arrays; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Stream; + +import static com.facebook.presto.common.type.BigintType.BIGINT; +import static com.facebook.presto.common.type.BooleanType.BOOLEAN; +import static com.facebook.presto.common.type.DoubleType.DOUBLE; +import static com.facebook.presto.common.type.VarcharType.VARCHAR; +import static com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager; +import static com.facebook.presto.sql.analyzer.ExpressionAnalyzer.getExpressionTypes; +import static com.facebook.presto.testing.TestingConnectorSession.SESSION; +import static java.util.stream.Collectors.toMap; + +public class TestClpQueryBase { + protected static final FunctionAndTypeManager functionAndTypeManager = createTestFunctionAndTypeManager(); + protected static final StandardFunctionResolution standardFunctionResolution = new FunctionResolution(functionAndTypeManager.getFunctionAndTypeResolver()); + protected static final Metadata metadata = MetadataManager.createTestMetadataManager(); + + protected static ClpColumnHandle regionId = new ClpColumnHandle("region.Id", BIGINT, true); + protected static ClpColumnHandle regionName = new ClpColumnHandle("region.Name", VARCHAR, true); + protected static ClpColumnHandle city = new ClpColumnHandle("city", VARCHAR, true); + protected static final ClpColumnHandle fare = new ClpColumnHandle("fare", DOUBLE, true); + protected static final ClpColumnHandle isHoliday = new ClpColumnHandle("isHoliday", BOOLEAN, true); + protected static final Map variableToColumnHandleMap = + Stream.of(regionId, regionName, city, fare, isHoliday) + .collect(toMap( + ch -> new VariableReferenceExpression(Optional.empty(), ch.getColumnName(), ch.getColumnType()), + ch -> ch + )); + protected final TypeProvider typeProvider = TypeProvider.fromVariables(variableToColumnHandleMap.keySet()); + + protected static class SessionHolder { + private final ConnectorSession connectorSession; + private final Session session; + + public SessionHolder() { + connectorSession = SESSION; + session = TestingSession.testSessionBuilder(new SessionPropertyManager(new SystemSessionProperties().getSessionProperties())).build(); + } + + public ConnectorSession getConnectorSession() { + return connectorSession; + } + + public Session getSession() { + return session; + } + } + + public static Expression expression(String sql) { + return ExpressionUtils.rewriteIdentifiersToSymbolReferences(new SqlParser().createExpression(sql, new ParsingOptions(ParsingOptions.DecimalLiteralTreatment.AS_DECIMAL))); + } + + protected RowExpression toRowExpression(Expression expression, Session session) { + Map, Type> expressionTypes = getExpressionTypes( + session, + metadata, + new SqlParser(), + typeProvider, + expression, + ImmutableMap.of(), + WarningCollector.NOOP); + return SqlToRowExpressionTranslator.translate(expression, expressionTypes, ImmutableMap.of(), functionAndTypeManager, session); + } + + protected RowExpression getRowExpression(String sqlExpression, SessionHolder sessionHolder) { + return toRowExpression(expression(sqlExpression), sessionHolder.getSession()); + } +} From bcf409676ad64c8bc4f6a0bb1983d515a92c588b Mon Sep 17 00:00:00 2001 From: rwang22 Date: Wed, 5 Mar 2025 03:23:29 +0000 Subject: [PATCH 080/126] remove SchemaTree add ClpMetadataProvider impl add ClpSplitProvider impl modify clpClient accordingly --- .../java/com/yscope/presto/ClpClient.java | 228 ++++-------------- .../java/com/yscope/presto/ClpConfig.java | 62 +++-- .../java/com/yscope/presto/ClpErrorCode.java | 4 +- .../presto/ClpFilterToKqlConverter.java | 4 +- .../com/yscope/presto/ClpSplitManager.java | 24 +- .../presto/metadata/ClpMetadataProvider.java | 5 +- .../metadata/ClpMySQLMetadataProvider.java | 92 +++++++ .../yscope/presto/metadata/ClpNodeType.java | 50 ++++ .../yscope/presto/metadata/ClpSchemaNode.java | 156 ------------ .../presto/split/ClpMySQLSplitProvider.java | 72 ++++++ .../yscope/presto/split/ClpSplitProvider.java | 9 + .../yscope/presto/TestClpPlanOptimizer.java | 2 +- 12 files changed, 330 insertions(+), 378 deletions(-) create mode 100644 presto-clp/src/main/java/com/yscope/presto/metadata/ClpNodeType.java delete mode 100644 presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaNode.java create mode 100644 presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java create mode 100644 presto-clp/src/main/java/com/yscope/presto/split/ClpSplitProvider.java diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 24c8a53e9951b..2afd6685b717f 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -14,68 +14,55 @@ package com.yscope.presto; import com.facebook.airlift.log.Logger; -import com.facebook.presto.common.type.ArrayType; -import com.facebook.presto.common.type.BigintType; -import com.facebook.presto.common.type.BooleanType; -import com.facebook.presto.common.type.DoubleType; -import com.facebook.presto.common.type.Type; -import com.facebook.presto.common.type.VarcharType; +import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.SchemaTableName; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; -import com.yscope.presto.metadata.ClpSchemaNode; +import com.yscope.presto.metadata.ClpMetadataProvider; +import com.yscope.presto.metadata.ClpMySQLMetadataProvider; +import com.yscope.presto.split.ClpMySQLSplitProvider; +import com.yscope.presto.split.ClpSplitProvider; import javax.inject.Inject; - -import java.nio.file.DirectoryStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import static java.util.Objects.requireNonNull; import static java.util.concurrent.TimeUnit.SECONDS; public class ClpClient { - private static final String COLUMN_METADATA_PREFIX = "column_metadata_"; - private static final String ARCHIVE_TABLE_SUFFIX = "archives"; private static final Logger log = Logger.get(ClpClient.class); - private static final String QUERY_SELECT_COLUMNS = "SELECT * FROM %s" + COLUMN_METADATA_PREFIX + "?"; - private static final String QUERY_SHOW_TABLES = "SHOW TABLES"; - private static final String QUERY_SELECT_ARCHIVE_IDS = "SELECT id FROM %s" + ARCHIVE_TABLE_SUFFIX; private final ClpConfig config; - private final String metadataDbUrl; - private final ClpConfig.InputSource inputSource; + private final ClpConfig.ArchiveSource archiveSource; private final LoadingCache> columnHandleCache; private final LoadingCache> tableNameCache; + private final ClpMetadataProvider clpMetadataProvider; + private final ClpSplitProvider clpSplitProvider; @Inject public ClpClient(ClpConfig config) { this.config = requireNonNull(config, "config is null"); - try { - Class.forName("com.mysql.jdbc.Driver"); + if (config.getMetadataSource() == ClpConfig.MetadataSource.MYSQL) { + clpMetadataProvider = new ClpMySQLMetadataProvider(config); + } + else { + log.error("Unsupported metadata source: %s", config.getMetadataSource()); + throw new PrestoException(ClpErrorCode.CLP_UNSUPPORTED_METADATA_SOURCE, "Unsupported metadata source: " + config.getMetadataSource()); } - catch (ClassNotFoundException e) { - log.error(e, "Failed to load MySQL JDBC driver"); + + if (config.getSplitSource() == ClpConfig.SplitSource.MYSQL) { + clpSplitProvider = new ClpMySQLSplitProvider(config); + } + else { + log.error("Unsupported split source: %s", config.getSplitSource()); + throw new PrestoException(ClpErrorCode.CLP_UNSUPPORTED_SPLIT_SOURCE, "Unsupported split source: " + config.getSplitSource()); + } - this.metadataDbUrl = "jdbc:mysql://" + config.getMetadataDbHost() + ":" + config.getMetadataDbPort() + "/" + config.getMetadataDbName(); - this.inputSource = config.getInputSource(); + + this.archiveSource = config.getInputSource(); this.columnHandleCache = CacheBuilder.newBuilder() .expireAfterWrite(config.getMetadataExpireInterval(), SECONDS) .refreshAfterWrite(config.getMetadataRefreshInterval(), SECONDS) @@ -87,66 +74,9 @@ public ClpClient(ClpConfig config) .build(CacheLoader.from(this::loadTable)); } - public ClpConfig getConfig() - { - return config; - } - public Set loadTableSchema(SchemaTableName schemaTableName) { - String query = "SELECT * FROM " + config.getMetadataTablePrefix() + COLUMN_METADATA_PREFIX + schemaTableName.getTableName(); - - Connection connection = null; - LinkedHashSet columnHandles = new LinkedHashSet<>(); - try { - connection = DriverManager.getConnection(metadataDbUrl, config.getMetadataDbUser(), config.getMetadataDbPassword()); - Statement statement = connection.createStatement(); - - ResultSet resultSet = statement.executeQuery(query); - - while (resultSet.next()) { - String columnName = resultSet.getString("name"); - ClpSchemaNode.NodeType columnType = ClpSchemaNode.NodeType.fromType(resultSet.getByte("type")); - Type prestoType = null; - switch (columnType) { - case Integer: - prestoType = BigintType.BIGINT; - break; - case Float: - prestoType = DoubleType.DOUBLE; - break; - case ClpString: - case VarString: - case DateString: - case NullValue: - prestoType = VarcharType.VARCHAR; - break; - case UnstructuredArray: - prestoType = new ArrayType(VarcharType.VARCHAR); - break; - case Boolean: - prestoType = BooleanType.BOOLEAN; - break; - default: - break; - } - columnHandles.add(new ClpColumnHandle(columnName, prestoType, true)); - } - } - catch (SQLException e) { - log.error(e, "Failed to connect to metadata database"); - return ImmutableSet.of(); - } - finally { - try { - if (connection != null) { - connection.close(); - } - } - catch (SQLException ex) { - log.warn(ex, "Failed to close metadata database connection"); - } - } + Set columnHandles = clpMetadataProvider.listTableSchema(schemaTableName); if (!config.isPolymorphicTypeEnabled()) { return columnHandles; } @@ -155,40 +85,7 @@ public Set loadTableSchema(SchemaTableName schemaTableName) public Set loadTable(String schemaName) { - ImmutableSet.Builder tableNames = ImmutableSet.builder(); - Connection connection = null; - try { - connection = DriverManager.getConnection(metadataDbUrl, config.getMetadataDbUser(), config.getMetadataDbPassword()); - Statement statement = connection.createStatement(); - - String query = "SHOW TABLES"; - ResultSet resultSet = statement.executeQuery(query); - - // Processing the results - String databaseName = config.getMetadataDbName(); - String tableNamePrefix = config.getMetadataTablePrefix() + COLUMN_METADATA_PREFIX; - while (resultSet.next()) { - String tableName = resultSet.getString("Tables_in_" + databaseName); - if (tableName.startsWith(config.getMetadataTablePrefix()) && tableName.length() > tableNamePrefix.length()) { - tableNames.add(tableName.substring(tableNamePrefix.length())); - } - } - } - catch (SQLException e) { - log.error(e, "Failed to connect to metadata database"); - } - finally { - // Closing the connection - try { - if (connection != null) { - connection.close(); - } - } - catch (SQLException ex) { - log.warn(ex, "Failed to close metadata database connection"); - } - } - return tableNames.build(); + return clpMetadataProvider.listTables(schemaName); } public Set listTables(String schemaName) @@ -196,58 +93,29 @@ public Set listTables(String schemaName) return tableNameCache.getUnchecked(schemaName); } - public List listArchiveIds(String tableName) + public List listSplits(ClpTableLayoutHandle layoutHandle) { - if (inputSource == ClpConfig.InputSource.LOCAL) { - Path tableDir = Paths.get(config.getClpArchiveDir(), tableName); - if (!Files.exists(tableDir) || !Files.isDirectory(tableDir)) { - return ImmutableList.of(); - } + return clpSplitProvider.listSplits(layoutHandle); +// if (archiveSource == ClpConfig.ArchiveSource.LOCAL) { +// Path tableDir = Paths.get(config.getClpArchiveDir(), tableName); +// if (!Files.exists(tableDir) || !Files.isDirectory(tableDir)) { +// return ImmutableList.of(); +// } +// +// try (DirectoryStream stream = Files.newDirectoryStream(tableDir)) { +// ImmutableList.Builder archiveIds = ImmutableList.builder(); +// for (Path path : stream) { +// if (Files.isDirectory(path)) { +// archiveIds.add(path.getFileName().toString()); +// } +// } +// return archiveIds.build(); +// } +// catch (Exception e) { +// return ImmutableList.of(); +// } +// } - try (DirectoryStream stream = Files.newDirectoryStream(tableDir)) { - ImmutableList.Builder archiveIds = ImmutableList.builder(); - for (Path path : stream) { - if (Files.isDirectory(path)) { - archiveIds.add(path.getFileName().toString()); - } - } - return archiveIds.build(); - } - catch (Exception e) { - return ImmutableList.of(); - } - } - else { - Connection connection = null; - try { - connection = DriverManager.getConnection(metadataDbUrl, config.getMetadataDbUser(), config.getMetadataDbPassword()); - Statement statement = connection.createStatement(); - - String query = "SELECT id FROM " + config.getMetadataTablePrefix() + ARCHIVE_TABLE_SUFFIX; - ResultSet resultSet = statement.executeQuery(query); - - ImmutableList.Builder archiveIds = ImmutableList.builder(); - while (resultSet.next()) { - archiveIds.add(resultSet.getString("id")); - } - return archiveIds.build(); - } - catch (SQLException e) { - log.error(e, "Failed to connect to metadata database"); - return ImmutableList.of(); - } - finally { - // Closing the connection - try { - if (connection != null) { - connection.close(); - } - } - catch (SQLException ex) { - log.warn(ex, "Failed to close metadata database connection"); - } - } - } } public Set listColumns(SchemaTableName schemaTableName) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java index 55b5fd07816d8..09d3d8d1b44b6 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java @@ -17,23 +17,37 @@ public class ClpConfig { - public enum InputSource + // TODO(Rui): We also need to change it in Velox and in the example configuration files + public enum ArchiveSource { LOCAL, S3 } + public enum MetadataSource + { + MYSQL + } + + // TODO(Rui): come up with a better name + public enum SplitSource + { + MYSQL + } + private boolean polymorphicTypeEnabled = true; - private String metadataDbHost; - private String metadataDbPort; + private MetadataSource metadataSource = MetadataSource.MYSQL; + // TODO(Rui): We need to change it in the example configuration files and in Velox + private String metadataDbUrl; private String metadataDbName; private String metadataDbUser; private String metadataDbPassword; private String metadataTablePrefix; private long metadataRefreshInterval = 60; private long metadataExpireInterval = 600; - private InputSource inputSource = InputSource.LOCAL; - private String clpExecutablePath; + private ArchiveSource archiveSource = ArchiveSource.LOCAL; + // TODO(Rui): We need to add it in the example configuration files and in Velox + private SplitSource splitSource = SplitSource.MYSQL; private String clpArchiveDir; private String s3Bucket; private String s3KeyPrefix; @@ -50,27 +64,27 @@ public ClpConfig setPolymorphicTypeEnabled(boolean polymorphicTypeEnabled) return this; } - public String getMetadataDbHost() + public MetadataSource getMetadataSource() { - return metadataDbHost; + return metadataSource; } - @Config("clp.metadata-db-host") - public ClpConfig setMetadataDbHost(String metadataDbHost) + @Config("clp.metadata-source") + public ClpConfig setMetadataSource(MetadataSource metadataSource) { - this.metadataDbHost = metadataDbHost; + this.metadataSource = metadataSource; return this; } - public String getMetadataDbPort() + public String getMetadataDbUrl() { - return metadataDbPort; + return metadataDbUrl; } - @Config("clp.metadata-db-port") - public ClpConfig setMetadataDbPort(String metadataDbPort) + @Config("clp.metadata-db-url") + public ClpConfig setMetadataDbUrl(String metadataDbUrl) { - this.metadataDbPort = metadataDbPort; + this.metadataDbUrl = metadataDbUrl; return this; } @@ -146,27 +160,27 @@ public ClpConfig setMetadataExpireInterval(long metadataExpireInterval) return this; } - public InputSource getInputSource() + public ArchiveSource getInputSource() { - return inputSource; + return archiveSource; } @Config("clp.input-source") - public ClpConfig setInputSource(InputSource inputSource) + public ClpConfig setInputSource(ArchiveSource archiveSource) { - this.inputSource = inputSource; + this.archiveSource = archiveSource; return this; } - public String getClpExecutablePath() + public SplitSource getSplitSource() { - return clpExecutablePath; + return splitSource; } - @Config("clp.executable-path") - public ClpConfig setClpExecutablePath(String clpExecutablePath) + @Config("clp.split-source") + public ClpConfig setSplitSource(SplitSource splitSource) { - this.clpExecutablePath = clpExecutablePath; + this.splitSource = splitSource; return this; } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpErrorCode.java b/presto-clp/src/main/java/com/yscope/presto/ClpErrorCode.java index 86c30792280d7..90907964173d8 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpErrorCode.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpErrorCode.java @@ -22,7 +22,9 @@ public enum ClpErrorCode implements ErrorCodeSupplier { - CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION(0, EXTERNAL); + CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION(0, EXTERNAL), + CLP_UNSUPPORTED_METADATA_SOURCE(1, EXTERNAL), + CLP_UNSUPPORTED_SPLIT_SOURCE(2, EXTERNAL); private final ErrorCode errorCode; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java index d85705e105cb9..7e4e4d62d2075 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java @@ -41,8 +41,8 @@ import static com.yscope.presto.ClpErrorCode.CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION; import static java.util.Objects.requireNonNull; -// TODO: Correctly handle escaping of special characters in LIKE expressions -// TODO: Consider whether it handles is NULL and is NOT NULL expressions correctly +// TODO(Rui): Correctly handle escaping of special characters in LIKE expressions +// TODO(Rui): Consider whether it handles `is NULL` and `is NOT NULL` expressions correctly public class ClpFilterToKqlConverter implements RowExpressionVisitor { diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java b/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java index 4d070be73b7f4..072ec7ae203a5 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java @@ -42,17 +42,17 @@ public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHand SplitSchedulingContext splitSchedulingContext) { ClpTableLayoutHandle layoutHandle = (ClpTableLayoutHandle) layout; - ClpTableHandle tableHandle = layoutHandle.getTable(); - if (!clpClient.listTables().contains(tableHandle.getTableName())) { - throw new RuntimeException("Table no longer exists: " + tableHandle.getTableName()); - } - - return new FixedSplitSource(clpClient.listArchiveIds(tableHandle.getTableName()) - .stream() - .map(archiveId -> new ClpSplit("default", - tableHandle.getTableName(), - archiveId, - layoutHandle.getQuery())) - .collect(Collectors.toList())); +// ClpTableHandle tableHandle = layoutHandle.getTable(); +// if (!clpClient.listTables(tableHandle.getSchemaTableName().getSchemaName()).contains(tableHandle.getTableName())) { +// throw new RuntimeException("Table no longer exists: " + tableHandle.getTableName()); +// } + return new FixedSplitSource(clpClient.listSplits(layoutHandle)); +// return new FixedSplitSource(clpClient.listArchiveIds(tableHandle.getTableName()) +// .stream() +// .map(archiveId -> new ClpSplit("default", +// tableHandle.getTableName(), +// archiveId, +// layoutHandle.getQuery())) +// .collect(Collectors.toList())); } } diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMetadataProvider.java b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMetadataProvider.java index 710e643b341a2..a7ad735369cb7 100644 --- a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMetadataProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMetadataProvider.java @@ -6,6 +6,7 @@ import java.util.Set; public interface ClpMetadataProvider { - public Set loadTableSchema(SchemaTableName schemaTableName); - + // TODO(Rui): Think about if it is necessary to return a set of ClpColumnHandle instead of a list of ClpColumnHandle + public Set listTableSchema(SchemaTableName schemaTableName); + public Set listTables(String schema); } diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java index 7bc153081f28d..61a05a0ada011 100644 --- a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java @@ -1,5 +1,97 @@ package com.yscope.presto.metadata; +import com.facebook.airlift.log.Logger; +import com.facebook.presto.common.type.*; +import com.facebook.presto.spi.SchemaTableName; +import com.yscope.presto.ClpColumnHandle; +import com.yscope.presto.ClpConfig; + +import java.sql.*; +import java.util.HashSet; +import java.util.Set; + public class ClpMySQLMetadataProvider implements ClpMetadataProvider{ + private static final Logger log = Logger.get(ClpMySQLMetadataProvider.class); + + private static final String COLUMN_METADATA_PREFIX = "column_metadata_"; + private static final String ARCHIVE_TABLE_SUFFIX = "archives"; + private static final String QUERY_SELECT_COLUMNS = "SELECT * FROM %s" + COLUMN_METADATA_PREFIX + "?"; + private static final String QUERY_SHOW_TABLES = "SHOW TABLES"; + + private final ClpConfig config; + + public ClpMySQLMetadataProvider(ClpConfig config) { + try { + Class.forName("com.mysql.jdbc.Driver"); + } + catch (ClassNotFoundException e) { + log.error(e, "Failed to load MySQL JDBC driver"); + } + this.config = config; + } + + private Connection getConnection() throws SQLException { + return DriverManager.getConnection(config.getMetadataDbUrl(), config.getMetadataDbUser(), config.getMetadataDbPassword()); + } + + // TODO(Rui): Consider move it to a util class + private Type mapColumnType(byte type) { + switch (ClpNodeType.fromType(type)) { + case Integer: return BigintType.BIGINT; + case Float: return DoubleType.DOUBLE; + case ClpString: + case VarString: + case DateString: + case NullValue: return VarcharType.VARCHAR; + case UnstructuredArray: return new ArrayType(VarcharType.VARCHAR); + case Boolean: return BooleanType.BOOLEAN; + default: throw new IllegalArgumentException("Unknown column type: " + type); + } + } + + @Override + public Set listTableSchema(SchemaTableName schemaTableName) { + Set columnHandles = new HashSet<>(); + String query = String.format(QUERY_SELECT_COLUMNS, config.getMetadataTablePrefix()); + + try (Connection connection = getConnection(); + PreparedStatement statement = connection.prepareStatement(query)) { + statement.setString(1, schemaTableName.getTableName()); + + try (ResultSet resultSet = statement.executeQuery()) { + while (resultSet.next()) { + columnHandles.add(new ClpColumnHandle( + resultSet.getString("name"), + mapColumnType(resultSet.getByte("type")), + true + )); + } + } + } catch (SQLException e) { + log.error("Failed to load table schema for: " + schemaTableName.getTableName(), e); + } + return columnHandles; + } + + @Override + public Set listTables(String schema) { + Set tableNames = new HashSet<>(); + + try (Connection connection = getConnection(); + Statement statement = connection.createStatement(); + ResultSet resultSet = statement.executeQuery(QUERY_SHOW_TABLES)) { + + while (resultSet.next()) { + String tableName = resultSet.getString("Tables_in_" + config.getMetadataDbName()); + if (tableName.startsWith(config.getMetadataTablePrefix() + COLUMN_METADATA_PREFIX)) { + tableNames.add(tableName.substring((config.getMetadataTablePrefix() + COLUMN_METADATA_PREFIX).length())); + } + } + } catch (SQLException e) { + log.error("Failed to load table names", e); + } + + return tableNames; + } } diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpNodeType.java b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpNodeType.java new file mode 100644 index 0000000000000..58f6d943dbf57 --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpNodeType.java @@ -0,0 +1,50 @@ +package com.yscope.presto.metadata; + +public enum ClpNodeType +{ + Integer((byte) 0), + Float((byte) 1), + ClpString((byte) 2), + VarString((byte) 3), + Boolean((byte) 4), + Object((byte) 5), + UnstructuredArray((byte) 6), + NullValue((byte) 7), + DateString((byte) 8), + StructuredArray((byte) 9); + + private final byte type; + private static final ClpNodeType[] LOOKUP_TABLE; + + static { + byte maxType = 0; + for (ClpNodeType nodeType : values()) { + if (nodeType.type > maxType) { + maxType = nodeType.type; + } + } + + ClpNodeType[] lookup = new ClpNodeType[maxType + 1]; + for (ClpNodeType nodeType : values()) { + lookup[nodeType.type] = nodeType; + } + + LOOKUP_TABLE = lookup; + } + + ClpNodeType(byte type) { + this.type = type; + } + + public static ClpNodeType fromType(byte type) { + if (type < 0 || type >= LOOKUP_TABLE.length || LOOKUP_TABLE[type] == null) { + throw new IllegalArgumentException("Invalid type code: " + type); + } + return LOOKUP_TABLE[type]; + } + + public byte getType() + { + return type; + } +} diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaNode.java b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaNode.java deleted file mode 100644 index 8e1678234970b..0000000000000 --- a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaNode.java +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.yscope.presto.metadata; - -import java.util.ArrayList; -import java.util.Objects; - -public class ClpSchemaNode -{ - private final int id; - private final int parentId; - private final ArrayList childrenIds; - private final String name; - private final NodeType type; - - public ClpSchemaNode(int id, int parentId, String name, NodeType type) - { - this.id = id; - this.parentId = parentId; - this.name = name; - this.type = type; - this.childrenIds = new ArrayList<>(); - } - - public String getName() - { - return name; - } - - public NodeType getType() - { - return type; - } - - public int getId() - { - return id; - } - - public int getParentId() - { - return parentId; - } - - public void addChild(int id) - { - childrenIds.add(id); - } - - public ArrayList getChildrenIds() - { - return childrenIds; - } - - public enum NodeType - { - Integer((byte) 0), - Float((byte) 1), - ClpString((byte) 2), - VarString((byte) 3), - Boolean((byte) 4), - Object((byte) 5), - UnstructuredArray((byte) 6), - NullValue((byte) 7), - DateString((byte) 8), - StructuredArray((byte) 9); - - private final byte type; - - NodeType(byte type) - { - this.type = type; - } - - public static NodeType fromType(byte type) - { - for (NodeType status : NodeType.values()) { - if (status.getType() == type) { - return status; - } - } - throw new IllegalArgumentException("Invalid type code: " + type); - } - - public byte getType() - { - return type; - } - } - - public static class NodeTuple - { - private final int parentId; - private final String name; - private final NodeType type; - - public NodeTuple(int parentId, String name, NodeType type) - { - this.parentId = parentId; - this.name = name; - this.type = type; - } - - public NodeTuple(String name, NodeType type) - { - this(-1, name, type); - } - - public NodeType getType() - { - return type; - } - - public int getParentId() - { - return parentId; - } - - public String getName() - { - return name; - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - NodeTuple tuple = (NodeTuple) o; - return Objects.equals(type, tuple.type) && - Objects.equals(parentId, tuple.parentId) && - Objects.equals(name, tuple.name); - } - - @Override - public int hashCode() - { - return Objects.hash(type, parentId, name); - } - } -} diff --git a/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java b/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java new file mode 100644 index 0000000000000..bd43da4d340e8 --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java @@ -0,0 +1,72 @@ +package com.yscope.presto.split; + +import com.facebook.airlift.log.Logger; +import com.facebook.presto.spi.SchemaTableName; +import com.yscope.presto.ClpConfig; +import com.yscope.presto.ClpSplit; +import com.yscope.presto.ClpTableLayoutHandle; +import com.yscope.presto.metadata.ClpMySQLMetadataProvider; + +import java.sql.*; +import java.util.ArrayList; +import java.util.List; + +public class ClpMySQLSplitProvider implements ClpSplitProvider { + private static final Logger log = Logger.get(ClpMySQLSplitProvider.class); + + private static final String ARCHIVE_TABLE_SUFFIX = "archives"; + private static final String TABLE_METADATA_TABLE_SUFFIX = "tables"; + private static final String QUERY_SELECT_ARCHIVE_IDS = "SELECT id FROM %s" + ARCHIVE_TABLE_SUFFIX; + private static final String QUERY_SELECT_TABLE_METADATA = "SELECT * FROM %s" + TABLE_METADATA_TABLE_SUFFIX + " WHERE AND table_name = ?"; + + private final ClpConfig config; + + public ClpMySQLSplitProvider(ClpConfig config) { + try { + Class.forName("com.mysql.jdbc.Driver"); + } + catch (ClassNotFoundException e) { + log.error(e, "Failed to load MySQL JDBC driver"); + } + this.config = config; + } + + private Connection getConnection() throws SQLException { + return DriverManager.getConnection(config.getMetadataDbUrl(), config.getMetadataDbUser(), config.getMetadataDbPassword()); + } + + @Override + // TODO(Rui): This method is not complete yet + public List listSplits(ClpTableLayoutHandle clpTableLayoutHandle) { +// List splits = new ArrayList<>(); +// String tableName = clpTableLayoutHandle.getTable().getSchemaTableName().getTableName(); +// String query = String.format(QUERY_SELECT_TABLE_METADATA, config.getMetadataTablePrefix()); +// try (Connection connection = getConnection(); +// PreparedStatement statement = connection.prepareStatement(query)) { +// statement.setString(1, schemaTableName.getTableName()); +// ResultSet resultSet = statement.executeQuery(); +// while (resultSet.next()) { +// String archiveId = resultSet.getString("archive_id"); +// } +// } +// catch (SQLException e) { +// log.error("Failed to retrieve table metadata", e); +// } +// +// List archiveIds = new ArrayList<>(); +// String query = String.format(QUERY_SELECT_ARCHIVE_IDS, config.getMetadataTablePrefix()); +// +// try (Connection connection = getConnection(); +// PreparedStatement statement = connection.prepareStatement(query); +// ResultSet resultSet = statement.executeQuery()) { +// +// while (resultSet.next()) { +// archiveIds.add(resultSet.getString("id")); +// } +// } catch (SQLException e) { +// log.error("Failed to retrieve archive IDs", e); +// } + + return null; + } +} diff --git a/presto-clp/src/main/java/com/yscope/presto/split/ClpSplitProvider.java b/presto-clp/src/main/java/com/yscope/presto/split/ClpSplitProvider.java new file mode 100644 index 0000000000000..a9080ac1aceac --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/split/ClpSplitProvider.java @@ -0,0 +1,9 @@ +package com.yscope.presto.split; + +import java.util.List; +import com.yscope.presto.ClpTableLayoutHandle; +import com.yscope.presto.ClpSplit; + +public interface ClpSplitProvider { + List listSplits(ClpTableLayoutHandle clpTableLayoutHandle); +} diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java b/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java index b001aeb53e6ba..8123f42bb84d3 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java @@ -110,7 +110,7 @@ public void testNotPushdown() { public void testInPushdown() { SessionHolder sessionHolder = new SessionHolder(); - testFilter("city IN ('hello world', 'hello world 2')", Optional.of("city: (\"hello world\" OR \"hello world 2\")"), Optional.empty(), sessionHolder); + testFilter("city IN ('hello world', 'hello world 2')", Optional.of("(city: \"hello world\" OR city: \"hello world 2\")"), Optional.empty(), sessionHolder); } @Test From 73e6a060a4a8f3247694710123451aed112005fa Mon Sep 17 00:00:00 2001 From: rwang22 Date: Thu, 6 Mar 2025 20:20:06 +0000 Subject: [PATCH 081/126] Finished ClP plan optimizer and metadata unit tests, fixed related bug and checkstyle issues Removed unnecessary resource files Modifed pom.xml --- presto-clp/pom.xml | 41 +++++-- .../java/com/yscope/presto/ClpClient.java | 14 ++- .../presto/ClpFilterToKqlConverter.java | 8 +- .../java/com/yscope/presto/ClpMetadata.java | 3 +- .../java/com/yscope/presto/ClpModule.java | 2 +- .../com/yscope/presto/ClpSplitManager.java | 4 +- .../presto/metadata/ClpMetadataProvider.java | 17 ++- .../metadata/ClpMySQLMetadataProvider.java | 95 ++++++++++----- .../yscope/presto/metadata/ClpNodeType.java | 19 ++- .../presto/split/ClpMySQLSplitProvider.java | 33 +++-- .../yscope/presto/split/ClpSplitProvider.java | 21 +++- .../com/yscope/presto/TestClpMetadata.java | 115 +++++++++++------- .../yscope/presto/TestClpPlanOptimizer.java | 51 +++++--- .../com/yscope/presto/TestClpQueryBase.java | 41 +++++-- .../array.dict | Bin 8 -> 0 bytes .../log.dict | Bin 53 -> 0 bytes .../schema_ids | Bin 48 -> 0 bytes .../schema_tree | Bin 65 -> 0 bytes .../table_metadata | Bin 51 -> 0 bytes .../tables | Bin 86 -> 0 bytes .../timestamp.dict | Bin 17 -> 0 bytes .../var.dict | Bin 43 -> 0 bytes .../array.dict | Bin 8 -> 0 bytes .../log.dict | Bin 8 -> 0 bytes .../schema_ids | Bin 40 -> 0 bytes .../schema_tree | Bin 76 -> 0 bytes .../table_metadata | Bin 29 -> 0 bytes .../tables | Bin 157 -> 0 bytes .../timestamp.dict | Bin 17 -> 0 bytes .../var.dict | Bin 250 -> 0 bytes presto-clp/src/test/resources/logs/test_1 | 4 - presto-clp/src/test/resources/logs/test_2 | 13 -- 32 files changed, 318 insertions(+), 163 deletions(-) delete mode 100644 presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/array.dict delete mode 100644 presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/log.dict delete mode 100644 presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/schema_ids delete mode 100644 presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/schema_tree delete mode 100644 presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/table_metadata delete mode 100644 presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/tables delete mode 100644 presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/timestamp.dict delete mode 100644 presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/var.dict delete mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/array.dict delete mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/log.dict delete mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/schema_ids delete mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/schema_tree delete mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/table_metadata delete mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/tables delete mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/timestamp.dict delete mode 100644 presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/var.dict delete mode 100644 presto-clp/src/test/resources/logs/test_1 delete mode 100644 presto-clp/src/test/resources/logs/test_2 diff --git a/presto-clp/pom.xml b/presto-clp/pom.xml index e548a1429409a..3c392da17fec5 100644 --- a/presto-clp/pom.xml +++ b/presto-clp/pom.xml @@ -51,10 +51,10 @@ guice - - javax.annotation - javax.annotation-api - + + + + com.google.code.findbugs @@ -78,15 +78,15 @@ provided - - com.fasterxml.jackson.core - jackson-databind - + + + + - - com.github.luben - zstd-jni - + + + + com.facebook.presto @@ -130,5 +130,22 @@ test + + com.h2database + h2 + test + + + + com.facebook.presto + presto-parser + test + + + + org.apache.commons + commons-math3 + test + diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 2afd6685b717f..1dbf163edc17a 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -25,7 +25,13 @@ import com.yscope.presto.split.ClpSplitProvider; import javax.inject.Inject; -import java.util.*; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; import static java.util.Objects.requireNonNull; import static java.util.concurrent.TimeUnit.SECONDS; @@ -35,7 +41,6 @@ public class ClpClient private static final Logger log = Logger.get(ClpClient.class); private final ClpConfig config; - private final ClpConfig.ArchiveSource archiveSource; private final LoadingCache> columnHandleCache; private final LoadingCache> tableNameCache; private final ClpMetadataProvider clpMetadataProvider; @@ -59,10 +64,8 @@ public ClpClient(ClpConfig config) else { log.error("Unsupported split source: %s", config.getSplitSource()); throw new PrestoException(ClpErrorCode.CLP_UNSUPPORTED_SPLIT_SOURCE, "Unsupported split source: " + config.getSplitSource()); - } - this.archiveSource = config.getInputSource(); this.columnHandleCache = CacheBuilder.newBuilder() .expireAfterWrite(config.getMetadataExpireInterval(), SECONDS) .refreshAfterWrite(config.getMetadataRefreshInterval(), SECONDS) @@ -115,7 +118,6 @@ public List listSplits(ClpTableLayoutHandle layoutHandle) // return ImmutableList.of(); // } // } - } public Set listColumns(SchemaTableName schemaTableName) @@ -126,7 +128,7 @@ public Set listColumns(SchemaTableName schemaTableName) private Set handlePolymorphicType(Set columnHandles) { Map> columnNameToColumnHandles = new HashMap<>(); - LinkedHashSet polymorphicColumnHandles = new LinkedHashSet<>(); + HashSet polymorphicColumnHandles = new HashSet<>(); for (ClpColumnHandle columnHandle : columnHandles) { columnNameToColumnHandles.computeIfAbsent(columnHandle.getColumnName(), k -> new ArrayList<>()) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java index 7e4e4d62d2075..dc081b7ad682e 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java @@ -162,7 +162,7 @@ private ClpExpression handleIn(SpecialFormExpression node) ConstantExpression literal = (ConstantExpression) argument; String literalString = getLiteralString(literal); queryBuilder.append(variableName).append(": "); - if (literal.getType().equals(VarcharType.VARCHAR)) { + if (literal.getType() instanceof VarcharType) { queryBuilder.append("\""); queryBuilder.append(literalString); queryBuilder.append("\""); @@ -235,7 +235,7 @@ private ClpExpression handleLogicalBinary(String operator, CallExpression node) String literalString = rightExpression.getDefinition().get(); Type literalType = node.getArguments().get(1).getType(); if (operator.equals("=")) { - if (literalType.equals(VarcharType.VARCHAR)) { + if (literalType instanceof VarcharType) { return new ClpExpression(variableName + ": \"" + literalString + "\""); } else { @@ -243,14 +243,14 @@ private ClpExpression handleLogicalBinary(String operator, CallExpression node) } } else if (operator.equals("<>")) { - if (literalType.equals(VarcharType.VARCHAR)) { + if (literalType instanceof VarcharType) { return new ClpExpression("NOT " + variableName + ": \"" + literalString + "\""); } else { return new ClpExpression("NOT " + variableName + ": " + literalString); } } - else if (LOGICAL_BINARY_OPS_FILTER.contains(operator) && !literalType.equals(VarcharType.VARCHAR)) { + else if (LOGICAL_BINARY_OPS_FILTER.contains(operator) && !(literalType instanceof VarcharType)) { return new ClpExpression(variableName + " " + operator + " " + literalString); } else { diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java b/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java index 3cc37900f5f89..41928791aa786 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java @@ -134,8 +134,7 @@ public Map> listTableColumns(ConnectorSess return schemaTableNames.stream() .collect(ImmutableMap.toImmutableMap( Function.identity(), - tableName -> getTableMetadata(session, getTableHandle(session, tableName)).getColumns() - )); + tableName -> getTableMetadata(session, getTableHandle(session, tableName)).getColumns())); } @Override diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpModule.java b/presto-clp/src/main/java/com/yscope/presto/ClpModule.java index 643009b9dbefb..acce28a23da0f 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpModule.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpModule.java @@ -28,7 +28,7 @@ public void configure(Binder binder) binder.bind(ClpConnector.class).in(Scopes.SINGLETON); binder.bind(ClpMetadata.class).in(Scopes.SINGLETON); binder.bind(ClpSplitManager.class).in(Scopes.SINGLETON); - binder.bind(ClpRecordSetProvider.class).in(Scopes.SINGLETON); +// binder.bind(ClpRecordSetProvider.class).in(Scopes.SINGLETON); binder.bind(ClpClient.class).in(Scopes.SINGLETON); configBinder(binder).bindConfig(ClpConfig.class); } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java b/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java index 072ec7ae203a5..0efc97d82addc 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java @@ -22,8 +22,6 @@ import javax.inject.Inject; -import java.util.stream.Collectors; - public class ClpSplitManager implements ConnectorSplitManager { @@ -46,7 +44,7 @@ public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHand // if (!clpClient.listTables(tableHandle.getSchemaTableName().getSchemaName()).contains(tableHandle.getTableName())) { // throw new RuntimeException("Table no longer exists: " + tableHandle.getTableName()); // } - return new FixedSplitSource(clpClient.listSplits(layoutHandle)); + return new FixedSplitSource(clpClient.listSplits(layoutHandle)); // return new FixedSplitSource(clpClient.listArchiveIds(tableHandle.getTableName()) // .stream() // .map(archiveId -> new ClpSplit("default", diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMetadataProvider.java b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMetadataProvider.java index a7ad735369cb7..4f9568b2d0346 100644 --- a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMetadataProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMetadataProvider.java @@ -1,3 +1,16 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.yscope.presto.metadata; import com.facebook.presto.spi.SchemaTableName; @@ -5,8 +18,10 @@ import java.util.Set; -public interface ClpMetadataProvider { +public interface ClpMetadataProvider +{ // TODO(Rui): Think about if it is necessary to return a set of ClpColumnHandle instead of a list of ClpColumnHandle public Set listTableSchema(SchemaTableName schemaTableName); + public Set listTables(String schema); } diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java index 61a05a0ada011..b944975d8ff6e 100644 --- a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java @@ -1,26 +1,52 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.yscope.presto.metadata; import com.facebook.airlift.log.Logger; -import com.facebook.presto.common.type.*; +import com.facebook.presto.common.type.ArrayType; +import com.facebook.presto.common.type.BigintType; +import com.facebook.presto.common.type.BooleanType; +import com.facebook.presto.common.type.DoubleType; +import com.facebook.presto.common.type.Type; +import com.facebook.presto.common.type.VarcharType; import com.facebook.presto.spi.SchemaTableName; import com.yscope.presto.ClpColumnHandle; import com.yscope.presto.ClpConfig; -import java.sql.*; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Statement; import java.util.HashSet; import java.util.Set; -public class ClpMySQLMetadataProvider implements ClpMetadataProvider{ +public class ClpMySQLMetadataProvider + implements ClpMetadataProvider +{ private static final Logger log = Logger.get(ClpMySQLMetadataProvider.class); - private static final String COLUMN_METADATA_PREFIX = "column_metadata_"; - private static final String ARCHIVE_TABLE_SUFFIX = "archives"; - private static final String QUERY_SELECT_COLUMNS = "SELECT * FROM %s" + COLUMN_METADATA_PREFIX + "?"; + public static final String COLUMN_METADATA_PREFIX = "column_metadata_"; + private static final String QUERY_SELECT_COLUMNS = "SELECT * FROM %s" + COLUMN_METADATA_PREFIX + "%s"; private static final String QUERY_SHOW_TABLES = "SHOW TABLES"; private final ClpConfig config; - public ClpMySQLMetadataProvider(ClpConfig config) { + public ClpMySQLMetadataProvider(ClpConfig config) + { try { Class.forName("com.mysql.jdbc.Driver"); } @@ -30,68 +56,77 @@ public ClpMySQLMetadataProvider(ClpConfig config) { this.config = config; } - private Connection getConnection() throws SQLException { + private Connection getConnection() throws SQLException + { return DriverManager.getConnection(config.getMetadataDbUrl(), config.getMetadataDbUser(), config.getMetadataDbPassword()); } // TODO(Rui): Consider move it to a util class - private Type mapColumnType(byte type) { + private Type mapColumnType(byte type) + { switch (ClpNodeType.fromType(type)) { - case Integer: return BigintType.BIGINT; - case Float: return DoubleType.DOUBLE; + case Integer: + return BigintType.BIGINT; + case Float: + return DoubleType.DOUBLE; case ClpString: case VarString: case DateString: - case NullValue: return VarcharType.VARCHAR; - case UnstructuredArray: return new ArrayType(VarcharType.VARCHAR); - case Boolean: return BooleanType.BOOLEAN; - default: throw new IllegalArgumentException("Unknown column type: " + type); + case NullValue: + return VarcharType.VARCHAR; + case UnstructuredArray: + return new ArrayType(VarcharType.VARCHAR); + case Boolean: + return BooleanType.BOOLEAN; + default: + throw new IllegalArgumentException("Unknown column type: " + type); } } @Override - public Set listTableSchema(SchemaTableName schemaTableName) { + public Set listTableSchema(SchemaTableName schemaTableName) + { Set columnHandles = new HashSet<>(); - String query = String.format(QUERY_SELECT_COLUMNS, config.getMetadataTablePrefix()); + String query = String.format(QUERY_SELECT_COLUMNS, config.getMetadataTablePrefix(), schemaTableName.getTableName()); try (Connection connection = getConnection(); - PreparedStatement statement = connection.prepareStatement(query)) { - statement.setString(1, schemaTableName.getTableName()); - + PreparedStatement statement = connection.prepareStatement(query)) { try (ResultSet resultSet = statement.executeQuery()) { while (resultSet.next()) { columnHandles.add(new ClpColumnHandle( resultSet.getString("name"), mapColumnType(resultSet.getByte("type")), - true - )); + true)); } } - } catch (SQLException e) { + } + catch (SQLException e) { log.error("Failed to load table schema for: " + schemaTableName.getTableName(), e); } return columnHandles; } @Override - public Set listTables(String schema) { + public Set listTables(String schema) + { Set tableNames = new HashSet<>(); try (Connection connection = getConnection(); - Statement statement = connection.createStatement(); - ResultSet resultSet = statement.executeQuery(QUERY_SHOW_TABLES)) { - + Statement statement = connection.createStatement(); + ResultSet resultSet = statement.executeQuery(QUERY_SHOW_TABLES)) { + ResultSetMetaData metaData = resultSet.getMetaData(); + String tableColumnName = metaData.getColumnName(1); while (resultSet.next()) { - String tableName = resultSet.getString("Tables_in_" + config.getMetadataDbName()); + String tableName = resultSet.getString(tableColumnName); if (tableName.startsWith(config.getMetadataTablePrefix() + COLUMN_METADATA_PREFIX)) { tableNames.add(tableName.substring((config.getMetadataTablePrefix() + COLUMN_METADATA_PREFIX).length())); } } - } catch (SQLException e) { + } + catch (SQLException e) { log.error("Failed to load table names", e); } return tableNames; } - } diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpNodeType.java b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpNodeType.java index 58f6d943dbf57..32e95111375cd 100644 --- a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpNodeType.java +++ b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpNodeType.java @@ -1,3 +1,16 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.yscope.presto.metadata; public enum ClpNodeType @@ -32,11 +45,13 @@ public enum ClpNodeType LOOKUP_TABLE = lookup; } - ClpNodeType(byte type) { + ClpNodeType(byte type) + { this.type = type; } - public static ClpNodeType fromType(byte type) { + public static ClpNodeType fromType(byte type) + { if (type < 0 || type >= LOOKUP_TABLE.length || LOOKUP_TABLE[type] == null) { throw new IllegalArgumentException("Invalid type code: " + type); } diff --git a/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java b/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java index bd43da4d340e8..498eac63161c2 100644 --- a/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java @@ -1,17 +1,31 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.yscope.presto.split; import com.facebook.airlift.log.Logger; -import com.facebook.presto.spi.SchemaTableName; import com.yscope.presto.ClpConfig; import com.yscope.presto.ClpSplit; import com.yscope.presto.ClpTableLayoutHandle; -import com.yscope.presto.metadata.ClpMySQLMetadataProvider; -import java.sql.*; -import java.util.ArrayList; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; import java.util.List; -public class ClpMySQLSplitProvider implements ClpSplitProvider { +public class ClpMySQLSplitProvider + implements ClpSplitProvider +{ private static final Logger log = Logger.get(ClpMySQLSplitProvider.class); private static final String ARCHIVE_TABLE_SUFFIX = "archives"; @@ -21,7 +35,8 @@ public class ClpMySQLSplitProvider implements ClpSplitProvider { private final ClpConfig config; - public ClpMySQLSplitProvider(ClpConfig config) { + public ClpMySQLSplitProvider(ClpConfig config) + { try { Class.forName("com.mysql.jdbc.Driver"); } @@ -31,13 +46,15 @@ public ClpMySQLSplitProvider(ClpConfig config) { this.config = config; } - private Connection getConnection() throws SQLException { + private Connection getConnection() throws SQLException + { return DriverManager.getConnection(config.getMetadataDbUrl(), config.getMetadataDbUser(), config.getMetadataDbPassword()); } @Override // TODO(Rui): This method is not complete yet - public List listSplits(ClpTableLayoutHandle clpTableLayoutHandle) { + public List listSplits(ClpTableLayoutHandle clpTableLayoutHandle) + { // List splits = new ArrayList<>(); // String tableName = clpTableLayoutHandle.getTable().getSchemaTableName().getTableName(); // String query = String.format(QUERY_SELECT_TABLE_METADATA, config.getMetadataTablePrefix()); diff --git a/presto-clp/src/main/java/com/yscope/presto/split/ClpSplitProvider.java b/presto-clp/src/main/java/com/yscope/presto/split/ClpSplitProvider.java index a9080ac1aceac..394446136f2b7 100644 --- a/presto-clp/src/main/java/com/yscope/presto/split/ClpSplitProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/split/ClpSplitProvider.java @@ -1,9 +1,24 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.yscope.presto.split; -import java.util.List; -import com.yscope.presto.ClpTableLayoutHandle; import com.yscope.presto.ClpSplit; +import com.yscope.presto.ClpTableLayoutHandle; + +import java.util.List; -public interface ClpSplitProvider { +public interface ClpSplitProvider +{ List listSplits(ClpTableLayoutHandle clpTableLayoutHandle); } diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java index fa79d7ebcc733..860402e49c830 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java @@ -21,49 +21,113 @@ import com.facebook.presto.spi.ConnectorTableMetadata; import com.facebook.presto.spi.SchemaTableName; import com.google.common.collect.ImmutableList; +import com.yscope.presto.metadata.ClpNodeType; +import org.apache.commons.math3.util.Pair; +import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; +import java.io.File; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.Arrays; import java.util.HashSet; +import java.util.List; import java.util.Optional; import static com.facebook.presto.testing.TestingConnectorSession.SESSION; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.fail; @Test(singleThreaded = true) public class TestClpMetadata { private ClpMetadata metadata; + private String metadataDbUrl; + + private static final String TABLE_NAME = "test"; + private static final String TABLE_SCHEMA = "default"; @BeforeMethod public void setUp() { - ClpConfig config = new ClpConfig().setClpArchiveDir("src/test/resources/clp_archive") - .setPolymorphicTypeEnabled(true) - .setClpExecutablePath("/usr/local/bin/clp-s-projection"); + metadataDbUrl = "jdbc:h2:file:/tmp/testdb;MODE=MySQL;DATABASE_TO_UPPER=FALSE"; + String metadataDbTablePrefix = "clp_"; + String columnMetadataTablePrefix = "column_metadata_"; + ClpConfig config = new ClpConfig().setPolymorphicTypeEnabled(true) + .setMetadataDbUrl(metadataDbUrl) + .setMetadataDbUser("sa") + .setMetadataDbPassword("") + .setMetadataTablePrefix(metadataDbTablePrefix); metadata = new ClpMetadata(new ClpClient(config)); + + try (Connection conn = DriverManager.getConnection(metadataDbUrl, "sa", ""); + Statement stmt = conn.createStatement()) { + String createTable = "CREATE TABLE IF NOT EXISTS " + metadataDbTablePrefix + columnMetadataTablePrefix + + TABLE_NAME + " (name VARCHAR(512) NOT NULL, type TINYINT NOT NULL, PRIMARY KEY (name, type))"; + stmt.execute(createTable); + + List> records = Arrays.asList( + new Pair<>("a", ClpNodeType.Integer), + new Pair<>("a", ClpNodeType.VarString), + new Pair<>("b", ClpNodeType.Float), + new Pair<>("b", ClpNodeType.ClpString), + new Pair<>("c", ClpNodeType.Float), + new Pair<>("c.d", ClpNodeType.Boolean), + new Pair<>("c.e", ClpNodeType.VarString)); + + String insertSQL = "INSERT INTO " + metadataDbTablePrefix + columnMetadataTablePrefix + TABLE_NAME + + " (name, type) VALUES (?, ?)"; + try (PreparedStatement pstmt = conn.prepareStatement(insertSQL)) { + for (Pair record : records) { + pstmt.setString(1, record.getFirst()); + pstmt.setByte(2, record.getSecond().getType()); + pstmt.addBatch(); + } + pstmt.executeBatch(); + } + } + catch (SQLException e) { + fail(e.getMessage()); + } + } + + @AfterMethod + public void tearDown() + { + File dbFile = new File("/tmp/testdb.mv.db"); + File lockFile = new File("/tmp/testdb.trace.db"); // Optional, H2 sometimes creates this + if (dbFile.exists()) { + dbFile.delete(); + System.out.println("Deleted database file: " + dbFile.getAbsolutePath()); + } + if (lockFile.exists()) { + lockFile.delete(); + } } @Test public void testListSchemaNames() { - assertEquals(metadata.listSchemaNames(SESSION), ImmutableList.of("default")); + assertEquals(metadata.listSchemaNames(SESSION), ImmutableList.of(TABLE_SCHEMA)); } @Test public void testListTables() { HashSet tables = new HashSet<>(); - tables.add(new SchemaTableName("default", "test_1_table")); - tables.add(new SchemaTableName("default", "test_2_table")); + tables.add(new SchemaTableName(TABLE_SCHEMA, TABLE_NAME)); assertEquals(new HashSet<>(metadata.listTables(SESSION, Optional.empty())), tables); } @Test - public void testGetTable1Metadata() + public void testGetTableMetadata() { ClpTableHandle clpTableHandle = - (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName("default", "test_1_table")); + (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName(TABLE_SCHEMA, TABLE_NAME)); ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(SESSION, clpTableHandle); HashSet columnMetadata = new HashSet<>(); columnMetadata.add(ColumnMetadata.builder() @@ -99,39 +163,4 @@ public void testGetTable1Metadata() columnMetadata.add(ColumnMetadata.builder().setName("c").setType(DoubleType.DOUBLE).setNullable(true).build()); assertEquals(columnMetadata, new HashSet<>(tableMetadata.getColumns())); } - - @Test - public void testGetTable2Metadata() - { - ClpTableHandle clpTableHandle = - (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName("default", "test_2_table")); - ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(SESSION, clpTableHandle); - HashSet columnMetadata = new HashSet<>(); - columnMetadata.add(ColumnMetadata.builder() - .setName("id") - .setType(BigintType.BIGINT) - .setNullable(true) - .build()); - columnMetadata.add(ColumnMetadata.builder() - .setName("name") - .setType(VarcharType.VARCHAR) - .setNullable(true) - .build()); - columnMetadata.add(ColumnMetadata.builder() - .setName("age") - .setType(BigintType.BIGINT) - .setNullable(true) - .build()); - columnMetadata.add(ColumnMetadata.builder() - .setName("city") - .setType(VarcharType.VARCHAR) - .setNullable(true) - .build()); - columnMetadata.add(ColumnMetadata.builder() - .setName("state") - .setType(VarcharType.VARCHAR) - .setNullable(true) - .build()); - assertEquals(columnMetadata, new HashSet<>(tableMetadata.getColumns())); - } } diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java b/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java index 8123f42bb84d3..a7a3f2749041e 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java @@ -18,12 +18,17 @@ import java.util.Optional; -import static org.testng.Assert.*; - -@Test() -public class TestClpPlanOptimizer extends TestClpQueryBase { +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +@Test +public class TestClpPlanOptimizer + extends TestClpQueryBase +{ private void testFilter(String sqlExpression, Optional expectedKqlExpression, - Optional expectedRemainingExpression, SessionHolder sessionHolder) { + Optional expectedRemainingExpression, SessionHolder sessionHolder) + { RowExpression pushDownExpression = getRowExpression(sqlExpression, sessionHolder); ClpExpression clpExpression = pushDownExpression.accept(new ClpFilterToKqlConverter( standardFunctionResolution, @@ -35,20 +40,20 @@ private void testFilter(String sqlExpression, Optional expectedKqlExpres if (expectedKqlExpression.isPresent()) { assertTrue(kqlExpression.isPresent()); assertEquals(kqlExpression.get(), expectedKqlExpression.get()); - } else { - assertFalse(kqlExpression.isPresent()); } if (expectedRemainingExpression.isPresent()) { assertTrue(remainingExpression.isPresent()); assertEquals(remainingExpression.get(), getRowExpression(expectedRemainingExpression.get(), sessionHolder)); - } else { + } + else { assertFalse(remainingExpression.isPresent()); } } @Test - public void testStringMatchPushdown() { + public void testStringMatchPushdown() + { SessionHolder sessionHolder = new SessionHolder(); testFilter("city = 'hello world'", Optional.of("city: \"hello world\""), Optional.empty(), sessionHolder); @@ -62,7 +67,8 @@ public void testStringMatchPushdown() { } @Test - public void testNumericComparisonPushdown() { + public void testNumericComparisonPushdown() + { SessionHolder sessionHolder = new SessionHolder(); testFilter("fare > 0", Optional.of("fare > 0"), Optional.empty(), sessionHolder); @@ -74,30 +80,33 @@ public void testNumericComparisonPushdown() { } @Test - public void testOrPushdown() { + public void testOrPushdown() + { SessionHolder sessionHolder = new SessionHolder(); testFilter("fare > 0 OR city like 'b%'", Optional.of("(fare > 0 OR city: \"b*\")"), Optional.empty(), sessionHolder); - testFilter("\"lower(region.Name)\" = 'hello world' OR region.Id != 1", Optional.empty(), Optional.of("(lower(\"region.Name\") = 'hello world' OR NOT region.Id: 1)"), + testFilter("lower(\"region.Name\") = 'hello world' OR \"region.Id\" != 1", Optional.empty(), Optional.of("(lower(\"region.Name\") = 'hello world' OR \"region.Id\" != 1)"), sessionHolder); } @Test - public void testAndPushdown() { + public void testAndPushdown() + { SessionHolder sessionHolder = new SessionHolder(); - testFilter("fare > 0 AND city like 'b%'", Optional.of("(fare > 0 AND city: \"b*\""), Optional.empty(), sessionHolder); - testFilter("lower(\"region.Name\") = 'hello world' AND region.Id != 1", Optional.of("NOT region.Id: 1"), Optional.of("lower(\"region.Name\") = 'hello world'"), + testFilter("fare > 0 AND city like 'b%'", Optional.of("(fare > 0 AND city: \"b*\")"), Optional.empty(), sessionHolder); + testFilter("lower(\"region.Name\") = 'hello world' AND \"region.Id\" != 1", Optional.of("(NOT region.Id: 1)"), Optional.of("lower(\"region.Name\") = 'hello world'"), sessionHolder); } @Test - public void testNotPushdown() { + public void testNotPushdown() + { SessionHolder sessionHolder = new SessionHolder(); - testFilter("region.Name NOT LIKE 'hello%'", Optional.of("NOT region.Name: \"hello*\""), Optional.empty(), sessionHolder); - testFilter("NOT (region.Name LIKE 'hello%')", Optional.of("NOT region.Name: \"hello*\""), Optional.empty(), sessionHolder); + testFilter("\"region.Name\" NOT LIKE 'hello%'", Optional.of("NOT region.Name: \"hello*\""), Optional.empty(), sessionHolder); + testFilter("NOT (\"region.Name\" LIKE 'hello%')", Optional.of("NOT region.Name: \"hello*\""), Optional.empty(), sessionHolder); testFilter("city != 'hello world'", Optional.of("NOT city: \"hello world\""), Optional.empty(), sessionHolder); testFilter("city <> 'hello world'", Optional.of("NOT city: \"hello world\""), Optional.empty(), sessionHolder); testFilter("NOT (city = 'hello world')", Optional.of("NOT city: \"hello world\""), Optional.empty(), sessionHolder); @@ -107,14 +116,16 @@ public void testNotPushdown() { } @Test - public void testInPushdown() { + public void testInPushdown() + { SessionHolder sessionHolder = new SessionHolder(); testFilter("city IN ('hello world', 'hello world 2')", Optional.of("(city: \"hello world\" OR city: \"hello world 2\")"), Optional.empty(), sessionHolder); } @Test - public void testComplexPushdown() { + public void testComplexPushdown() + { testFilter("(fare > 0 OR city like 'b%') AND (lower(\"region.Name\") = 'hello world' OR city IS NULL)", Optional.of("((fare > 0 OR city: \"b*\"))"), Optional.of("(lower(\"region.Name\") = 'hello world' OR city IS NULL)"), diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpQueryBase.java b/presto-clp/src/test/java/com/yscope/presto/TestClpQueryBase.java index c30f02c239b6c..546f2459eacef 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpQueryBase.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpQueryBase.java @@ -1,3 +1,16 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.yscope.presto; import com.facebook.presto.Session; @@ -24,7 +37,6 @@ import com.facebook.presto.testing.TestingSession; import com.google.common.collect.ImmutableMap; -import java.util.Arrays; import java.util.Map; import java.util.Optional; import java.util.stream.Stream; @@ -38,7 +50,8 @@ import static com.facebook.presto.testing.TestingConnectorSession.SESSION; import static java.util.stream.Collectors.toMap; -public class TestClpQueryBase { +public class TestClpQueryBase +{ protected static final FunctionAndTypeManager functionAndTypeManager = createTestFunctionAndTypeManager(); protected static final StandardFunctionResolution standardFunctionResolution = new FunctionResolution(functionAndTypeManager.getFunctionAndTypeResolver()); protected static final Metadata metadata = MetadataManager.createTestMetadataManager(); @@ -52,33 +65,38 @@ public class TestClpQueryBase { Stream.of(regionId, regionName, city, fare, isHoliday) .collect(toMap( ch -> new VariableReferenceExpression(Optional.empty(), ch.getColumnName(), ch.getColumnType()), - ch -> ch - )); + ch -> ch)); protected final TypeProvider typeProvider = TypeProvider.fromVariables(variableToColumnHandleMap.keySet()); - protected static class SessionHolder { + protected static class SessionHolder + { private final ConnectorSession connectorSession; private final Session session; - public SessionHolder() { + public SessionHolder() + { connectorSession = SESSION; session = TestingSession.testSessionBuilder(new SessionPropertyManager(new SystemSessionProperties().getSessionProperties())).build(); } - public ConnectorSession getConnectorSession() { + public ConnectorSession getConnectorSession() + { return connectorSession; } - public Session getSession() { + public Session getSession() + { return session; } } - public static Expression expression(String sql) { + public static Expression expression(String sql) + { return ExpressionUtils.rewriteIdentifiersToSymbolReferences(new SqlParser().createExpression(sql, new ParsingOptions(ParsingOptions.DecimalLiteralTreatment.AS_DECIMAL))); } - protected RowExpression toRowExpression(Expression expression, Session session) { + protected RowExpression toRowExpression(Expression expression, Session session) + { Map, Type> expressionTypes = getExpressionTypes( session, metadata, @@ -90,7 +108,8 @@ protected RowExpression toRowExpression(Expression expression, Session session) return SqlToRowExpressionTranslator.translate(expression, expressionTypes, ImmutableMap.of(), functionAndTypeManager, session); } - protected RowExpression getRowExpression(String sqlExpression, SessionHolder sessionHolder) { + protected RowExpression getRowExpression(String sqlExpression, SessionHolder sessionHolder) + { return toRowExpression(expression(sqlExpression), sessionHolder.getSession()); } } diff --git a/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/array.dict b/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/array.dict deleted file mode 100644 index 1b1cb4d44c57c2d7a5122870fa6ac3e62ff7e94e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8 KcmZQzfB*mh2mk>9 diff --git a/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/log.dict b/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/log.dict deleted file mode 100644 index 15e08683264d65e964e3d20d5f1eb7a987fd25c3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 53 zcmZQ#fB=oH`hOWBR2dl_a5H$M=H%onl;;=ar0_Dh=jW#+Ri;9?ObiKs7!w!)P@E2r diff --git a/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/schema_ids b/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/schema_ids deleted file mode 100644 index c5ce9e254d27088e079d0e93d55e960b8568d413..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 48 zcmdPcs{fZE!j_R?1~UT-BLf2i6Du2#WM}38k{k@?oNqqaYH2GleyTmW>vRrtE&$Sx B3ef-n diff --git a/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/schema_tree b/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/schema_tree deleted file mode 100644 index d96afdbabf05e4c644b4fc8cdc2d5618a23d5cdb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 65 zcmdPcs{fZE;wU4-4^D>vK)}Gt00fK-i3~}M$*jyNEU8QkiOfmKjNA+l1R9bPcn%mE Susx8N%)}wV!@Nn3K@0$g^$}(O diff --git a/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/table_metadata b/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/table_metadata deleted file mode 100644 index 13e31e9b13b64ba2221a02b87f12f53c83449e23..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 51 zcmdPcs{fZEB9xI~1v3K^gB1g#I)elQldS*)1aLAaoc1q1cpyQb<;GKPHXfBWMlk>d CN(%G< diff --git a/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/tables b/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/tables deleted file mode 100644 index cba9aaa49ba4c925555c5eaca6b03d41790583d9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 86 zcmdPcs{fZE;wu9~10w?yg98JH10w?jurlaMGblYc^xy$w0;2|4SuO*E1_J{lg98%- Ol4v6XP$vX1Lumj-gAmvN diff --git a/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/timestamp.dict b/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/timestamp.dict deleted file mode 100644 index 203cc1c9f498713682563f8263cf681a8bcb4e49..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 17 YcmdPcs{fZE!j*wRfPsOL!RP=J04P)gtN;K2 diff --git a/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/var.dict b/presto-clp/src/test/resources/clp_archive/test_1_table/13575e0c-4845-4659-9278-783256f9066e/var.dict deleted file mode 100644 index 2eefab01477dcdb29bc977313d7b9606c77a326a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 43 ucmZQ!fB=oH`hOWBt}-wbFf*j(=QA@TB^Ff`mR7JZG;mC}J#c~f022Vtj|;Z| diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/array.dict b/presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/array.dict deleted file mode 100644 index 1b1cb4d44c57c2d7a5122870fa6ac3e62ff7e94e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8 KcmZQzfB*mh2mk>9 diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/log.dict b/presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/log.dict deleted file mode 100644 index 1b1cb4d44c57c2d7a5122870fa6ac3e62ff7e94e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8 KcmZQzfB*mh2mk>9 diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/schema_ids b/presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/schema_ids deleted file mode 100644 index ab77a0ddfa1bf93214014ae46b344a023c0b4b6d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 40 ocmdPcs{fZE;x7Zk4n_u6Mg|53CLm@8Viq7~1@m|7m^H8h0GS&F&;S4c diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/schema_tree b/presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/schema_tree deleted file mode 100644 index 3fa58b754f501b28944778ec8a7a0367f575e36e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 76 zcmdPcs{fZELY9f)3KJW{e;{CBWdH&uhRhTO7KXgU+*D=|9|#iDQ diff --git a/presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/tables b/presto-clp/src/test/resources/clp_archive/test_2_table/a9a3ba79-44d6-4eef-995a-c4a0dfa64f66/tables deleted file mode 100644 index 77cfca5fed993e6fab8e42573775c100168de7bf..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 157 zcmdPcs{fZEVkrxQ5UT*6!MzGz1=HWFc?0r=XEOb_cZt&yWp{e8u|$qfTDXnzfNqM) zVy`znIweByFRdtU)Zj=6K79Y3S@l}K#Z%8eW42XzP|h6rDRr7^d>VpU6-*9J#*JOLgAC_^5;KZHhBwL}u>N^& Aga7~l diff --git a/presto-clp/src/test/resources/logs/test_1 b/presto-clp/src/test/resources/logs/test_1 deleted file mode 100644 index 885f19b5dfb59..0000000000000 --- a/presto-clp/src/test/resources/logs/test_1 +++ /dev/null @@ -1,4 +0,0 @@ -{"a": 1, "b": 2.0, "c": {"d": true, "e": "Hello world"}} -{"a": 2, "b": 3.0, "c": {"d": false, "e": "Goodbye world"}} -{"a": "foo", "b": "bar", "c": 2.0} -{"a": "baz", "b": "qux"} \ No newline at end of file diff --git a/presto-clp/src/test/resources/logs/test_2 b/presto-clp/src/test/resources/logs/test_2 deleted file mode 100644 index a8c036b602f0e..0000000000000 --- a/presto-clp/src/test/resources/logs/test_2 +++ /dev/null @@ -1,13 +0,0 @@ -{"id": 0, "name": "John", "age": 32, "city": "Charlotte", "state": "NC"} -{"id": 1, "name": "Jane", "age": 25, "city": "Chicago", "state": "IL"} -{"id": 2, "name": "Doe", "age": 43, "city": "Nashville", "state": "TN"} -{"id": 3, "name": "Jack", "age": 29, "city": "Columbus", "state": "OH"} -{"id": 4, "name": "Jill", "age": 35, "city": "Seattle", "state": "WA"} -{"id": 5, "name": "Joe", "age": 38, "city": "Boston", "state": "MA"} -{"id": 6, "name": "Jenny", "age": 27, "city": "Miami", "state": "FL"} -{"id": 7, "name": "Jim", "age": 31, "city": "Denver", "state": "CO"} -{"id": 8, "name": "Judy", "age": 40, "city": "Houston", "state": "TX"} -{"id": 9, "name": "Jerry", "age": 33, "city": "Philadelphia", "state": "PA"} -{"id": 10, "name": "Jesse", "age": 30, "city": "Phoenix", "state": "AZ"} -{"id": 11, "name": "Jasmine", "age": 28, "city": "Austin", "state": "TX"} -{"id": 12, "name": "Jared", "age": 26, "city": "Portland", "state": "OR"} \ No newline at end of file From e4db3371f02c6f0ca81d2ef003037822cc1bb1b0 Mon Sep 17 00:00:00 2001 From: rwang22 Date: Tue, 11 Mar 2025 13:33:03 +0000 Subject: [PATCH 082/126] update protocol code for clp connector --- .../main/types/PrestoToVeloxConnector.cpp | 1 + .../presto_cpp/presto_protocol/Makefile | 9 ++ .../connector/clp/ClpConnectorProtocol.h | 29 ++++ .../clp/presto_protocol-json-cpp.mustache | 146 ++++++++++++++++++ .../clp/presto_protocol-json-hpp.mustache | 68 ++++++++ .../connector/clp/presto_protocol_clp.cpp | 146 ++++++++++++++++++ .../connector/clp/presto_protocol_clp.h | 96 ++++++++++++ .../connector/clp/presto_protocol_clp.json | 121 +++++++++++++++ .../connector/clp/presto_protocol_clp.yml | 39 +++++ .../clp/special/ClpColumnHandle.hpp.inc | 33 ++++ .../clp/special/ClpTransactionHandle.cpp.inc | 30 ++++ .../clp/special/ClpTransactionHandle.hpp.inc | 28 ++++ .../presto_protocol/presto_protocol.h | 1 + 13 files changed, 747 insertions(+) create mode 100644 presto-native-execution/presto_cpp/presto_protocol/connector/clp/ClpConnectorProtocol.h create mode 100644 presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol-json-cpp.mustache create mode 100644 presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol-json-hpp.mustache create mode 100644 presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.cpp create mode 100644 presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h create mode 100644 presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.json create mode 100644 presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.yml create mode 100644 presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpColumnHandle.hpp.inc create mode 100644 presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpTransactionHandle.cpp.inc create mode 100644 presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpTransactionHandle.hpp.inc diff --git a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp index 35116ff9b972d..1cf49ff44bc84 100644 --- a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp +++ b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp @@ -13,6 +13,7 @@ */ #include "presto_cpp/main/types/PrestoToVeloxConnector.h" +#include "presto_cpp/presto_protocol/connector/clp/ClpConnectorProtocol.h" #include "presto_cpp/presto_protocol/connector/hive/HiveConnectorProtocol.h" #include "presto_cpp/presto_protocol/connector/iceberg/IcebergConnectorProtocol.h" #include "presto_cpp/presto_protocol/connector/tpch/TpchConnectorProtocol.h" diff --git a/presto-native-execution/presto_cpp/presto_protocol/Makefile b/presto-native-execution/presto_cpp/presto_protocol/Makefile index 3ee2b4e802b81..775fd20d63f9b 100644 --- a/presto-native-execution/presto_cpp/presto_protocol/Makefile +++ b/presto-native-execution/presto_cpp/presto_protocol/Makefile @@ -45,14 +45,23 @@ presto_protocol-cpp: presto_protocol-json chevron -d connector/tpch/presto_protocol_tpch.json connector/tpch/presto_protocol-json-hpp.mustache >> connector/tpch/presto_protocol_tpch.h clang-format -style=file -i connector/tpch/presto_protocol_tpch.h connector/tpch/presto_protocol_tpch.cpp + # build clp connector related structs + echo "// DO NOT EDIT : This file is generated by chevron" > connector/clp/presto_protocol_clp.cpp + chevron -d connector/clp/presto_protocol_clp.json connector/clp/presto_protocol-json-cpp.mustache >> connector/clp/presto_protocol_clp.cpp + echo "// DO NOT EDIT : This file is generated by chevron" > connector/clp/presto_protocol_clp.h + chevron -d connector/clp/presto_protocol_clp.json connector/clp/presto_protocol-json-hpp.mustache >> connector/clp/presto_protocol_clp.h + clang-format -style=file -i connector/clp/presto_protocol_clp.h connector/clp/presto_protocol_clp.cpp + presto_protocol-json: ./java-to-struct-json.py --config core/presto_protocol_core.yml core/special/*.java core/special/*.inc -j | jq . > core/presto_protocol_core.json ./java-to-struct-json.py --config connector/hive/presto_protocol_hive.yml connector/hive/special/*.inc -j | jq . > connector/hive/presto_protocol_hive.json ./java-to-struct-json.py --config connector/iceberg/presto_protocol_iceberg.yml connector/iceberg/special/*.inc -j | jq . > connector/iceberg/presto_protocol_iceberg.json ./java-to-struct-json.py --config connector/tpch/presto_protocol_tpch.yml connector/tpch/special/*.inc -j | jq . > connector/tpch/presto_protocol_tpch.json + ./java-to-struct-json.py --config connector/clp/presto_protocol_clp.yml connector/clp/special/*.inc -j | jq . > connector/clp/presto_protocol_clp.json presto_protocol.proto: presto_protocol-json pystache presto_protocol-protobuf.mustache core/presto_protocol_core.json > core/presto_protocol_core.proto pystache presto_protocol-protobuf.mustache connector/hive/presto_protocol_hive.json > connector/hive/presto_protocol_hive.proto pystache presto_protocol-protobuf.mustache connector/iceberg/presto_protocol_iceberg.json > connector/iceberg/presto_protocol_iceberg.proto pystache presto_protocol-protobuf.mustache connector/tpch/presto_protocol_tpch.json > connector/tpch/presto_protocol_tpch.proto + pystache presto_protocol-protobuf.mustache connector/clp/presto_protocol_clp.json > connector/clp/presto_protocol_tpch.proto diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/ClpConnectorProtocol.h b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/ClpConnectorProtocol.h new file mode 100644 index 0000000000000..5b1e76b4606c4 --- /dev/null +++ b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/ClpConnectorProtocol.h @@ -0,0 +1,29 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include "presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h" +#include "presto_cpp/presto_protocol/core/ConnectorProtocol.h" + +namespace facebook::presto::protocol::clp { +using ClpConnectorProtocol = ConnectorProtocolTemplate< + ClpTableHandle, + ClpTableLayoutHandle, + ClpColumnHandle, + NotImplemented, + NotImplemented, + ClpSplit, + NotImplemented, + ClpTransactionHandle, + NotImplemented>; +} // namespace facebook::presto::protocol::clp diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol-json-cpp.mustache b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol-json-cpp.mustache new file mode 100644 index 0000000000000..f30beed5a875a --- /dev/null +++ b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol-json-cpp.mustache @@ -0,0 +1,146 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// presto_protocol.prolog.cpp +// + +{{#.}} +{{#comment}} +{{comment}} +{{/comment}} +{{/.}} + + +#include "presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h" +using namespace std::string_literals; + +namespace facebook::presto::protocol::clp { + +void to_json(json& j, const ClpTransactionHandle& p) { + j = json::array(); + j.push_back(p._type); + j.push_back(p.instance); +} + +void from_json(const json& j, ClpTransactionHandle& p) { + j[0].get_to(p._type); + j[1].get_to(p.instance); +} +} // namespace facebook::presto::protocol +{{#.}} +{{#cinc}} +{{&cinc}} +{{/cinc}} +{{^cinc}} +{{#struct}} +namespace facebook::presto::protocol::clp { + {{#super_class}} + {{&class_name}}::{{&class_name}}() noexcept { + _type = "{{json_key}}"; + } + {{/super_class}} + + void to_json(json& j, const {{&class_name}}& p) { + j = json::object(); + {{#super_class}} + j["@type"] = "{{&json_key}}"; + {{/super_class}} + {{#fields}} + to_json_key(j, "{{&field_name}}", p.{{field_name}}, "{{&class_name}}", "{{&field_text}}", "{{&field_name}}"); + {{/fields}} + } + + void from_json(const json& j, {{&class_name}}& p) { + {{#super_class}} + p._type = j["@type"]; + {{/super_class}} + {{#fields}} + from_json_key(j, "{{&field_name}}", p.{{field_name}}, "{{&class_name}}", "{{&field_text}}", "{{&field_name}}"); + {{/fields}} + } +} +{{/struct}} +{{#enum}} +namespace facebook::presto::protocol::clp { + //Loosly copied this here from NLOHMANN_JSON_SERIALIZE_ENUM() + + // NOLINTNEXTLINE: cppcoreguidelines-avoid-c-arrays + static const std::pair<{{&class_name}}, json> + {{&class_name}}_enum_table[] = { // NOLINT: cert-err58-cpp + {{#elements}} + { {{&class_name}}::{{&element}}, "{{&element}}" }{{^_last}},{{/_last}} + {{/elements}} + }; + void to_json(json& j, const {{&class_name}}& e) + { + static_assert(std::is_enum<{{&class_name}}>::value, "{{&class_name}} must be an enum!"); + const auto* it = std::find_if(std::begin({{&class_name}}_enum_table), std::end({{&class_name}}_enum_table), + [e](const std::pair<{{&class_name}}, json>& ej_pair) -> bool + { + return ej_pair.first == e; + }); + j = ((it != std::end({{&class_name}}_enum_table)) ? it : std::begin({{&class_name}}_enum_table))->second; + } + void from_json(const json& j, {{&class_name}}& e) + { + static_assert(std::is_enum<{{&class_name}}>::value, "{{&class_name}} must be an enum!"); + const auto* it = std::find_if(std::begin({{&class_name}}_enum_table), std::end({{&class_name}}_enum_table), + [&j](const std::pair<{{&class_name}}, json>& ej_pair) -> bool + { + return ej_pair.second == j; + }); + e = ((it != std::end({{&class_name}}_enum_table)) ? it : std::begin({{&class_name}}_enum_table))->first; + } +} +{{/enum}} +{{#abstract}} +namespace facebook::presto::protocol::clp { + void to_json(json& j, const std::shared_ptr<{{&class_name}}>& p) { + if ( p == nullptr ) { + return; + } + String type = p->_type; + + {{#subclasses}} + if ( type == "{{&key}}" ) { + j = *std::static_pointer_cast<{{&type}}>(p); + return; + } + {{/subclasses}} + + throw TypeError(type + " no abstract type {{&class_name}} {{&key}}"); + } + + void from_json(const json& j, std::shared_ptr<{{&class_name}}>& p) { + String type; + try { + type = p->getSubclassKey(j); + } catch (json::parse_error &e) { + throw ParseError(std::string(e.what()) + " {{&class_name}} {{&key}} {{&class_name}}"); + } + + {{#subclasses}} + if ( type == "{{&key}}" ) { + std::shared_ptr<{{&type}}> k = std::make_shared<{{&type}}>(); + j.get_to(*k); + p = std::static_pointer_cast<{{&class_name}}>(k); + return; + } + {{/subclasses}} + + throw TypeError(type + " no abstract type {{&class_name}} {{&key}}"); + } +} +{{/abstract}} +{{/cinc}} +{{/.}} diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol-json-hpp.mustache b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol-json-hpp.mustache new file mode 100644 index 0000000000000..f903bd681a5c2 --- /dev/null +++ b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol-json-hpp.mustache @@ -0,0 +1,68 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +{{#.}} +{{#comment}} +{{comment}} +{{/comment}} +{{/.}} + +#include +#include + +#include "presto_cpp/external/json/nlohmann/json.hpp" +#include "presto_cpp/presto_protocol/core/presto_protocol_core.h" + +namespace facebook::presto::protocol::clp { +struct ClpTransactionHandle : public ConnectorTransactionHandle { + String instance = {}; + }; +void to_json(json& j, const ClpTransactionHandle& p); + +void from_json(const json& j, ClpTransactionHandle& p); +} //namespace facebook::presto::protocol +{{#.}} +{{#hinc}} +{{&hinc}} +{{/hinc}} +{{^hinc}} +{{#struct}} +namespace facebook::presto::protocol::clp { + struct {{class_name}} {{#super_class}}: public {{super_class}}{{/super_class}}{ + {{#fields}} + {{#field_local}}{{#optional}}std::shared_ptr<{{/optional}}{{&field_text}}{{#optional}}>{{/optional}} {{&field_name}} = {};{{/field_local}} + {{/fields}} + + {{#super_class}} + {{class_name}}() noexcept; + {{/super_class}} + }; + void to_json(json& j, const {{class_name}}& p); + void from_json(const json& j, {{class_name}}& p); +} +{{/struct}} +{{#enum}} +namespace facebook::presto::protocol::clp { + enum class {{class_name}} { + {{#elements}} + {{&element}}{{^_last}},{{/_last}} + {{/elements}} + }; + extern void to_json(json& j, const {{class_name}}& e); + extern void from_json(const json& j, {{class_name}}& e); +} +{{/enum}} +{{/hinc}} +{{/.}} diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.cpp b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.cpp new file mode 100644 index 0000000000000..52be07e54cc2a --- /dev/null +++ b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.cpp @@ -0,0 +1,146 @@ +// DO NOT EDIT : This file is generated by chevron +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// presto_protocol.prolog.cpp +// + +// This file is generated DO NOT EDIT @generated + +#include "presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h" +using namespace std::string_literals; + +namespace facebook::presto::protocol::clp { + +void to_json(json& j, const ClpTransactionHandle& p) { + j = json::array(); + j.push_back(p._type); + j.push_back(p.instance); +} + +void from_json(const json& j, ClpTransactionHandle& p) { + j[0].get_to(p._type); + j[1].get_to(p.instance); +} +} // namespace facebook::presto::protocol::clp +namespace facebook::presto::protocol::clp { +ClpColumnHandle::ClpColumnHandle() noexcept { + _type = "clp"; +} + +void to_json(json& j, const ClpColumnHandle& p) { + j = json::object(); + j["@type"] = "clp"; + to_json_key( + j, "columnName", p.columnName, "ClpColumnHandle", "String", "columnName"); + to_json_key( + j, + "originalColumnName", + p.originalColumnName, + "ClpColumnHandle", + "String", + "originalColumnName"); + to_json_key( + j, "columnType", p.columnType, "ClpColumnHandle", "Type", "columnType"); + to_json_key(j, "nullable", p.nullable, "ClpColumnHandle", "bool", "nullable"); +} + +void from_json(const json& j, ClpColumnHandle& p) { + p._type = j["@type"]; + from_json_key( + j, "columnName", p.columnName, "ClpColumnHandle", "String", "columnName"); + from_json_key( + j, + "originalColumnName", + p.originalColumnName, + "ClpColumnHandle", + "String", + "originalColumnName"); + from_json_key( + j, "columnType", p.columnType, "ClpColumnHandle", "Type", "columnType"); + from_json_key( + j, "nullable", p.nullable, "ClpColumnHandle", "bool", "nullable"); +} +} // namespace facebook::presto::protocol::clp +namespace facebook::presto::protocol::clp { +ClpSplit::ClpSplit() noexcept { + _type = "clp"; +} + +void to_json(json& j, const ClpSplit& p) { + j = json::object(); + j["@type"] = "clp"; + to_json_key( + j, "schemaName", p.schemaName, "ClpSplit", "String", "schemaName"); + to_json_key(j, "tableName", p.tableName, "ClpSplit", "String", "tableName"); + to_json_key(j, "archiveId", p.archiveId, "ClpSplit", "String", "archiveId"); + to_json_key(j, "query", p.query, "ClpSplit", "String", "query"); +} + +void from_json(const json& j, ClpSplit& p) { + p._type = j["@type"]; + from_json_key( + j, "schemaName", p.schemaName, "ClpSplit", "String", "schemaName"); + from_json_key(j, "tableName", p.tableName, "ClpSplit", "String", "tableName"); + from_json_key(j, "archiveId", p.archiveId, "ClpSplit", "String", "archiveId"); + from_json_key(j, "query", p.query, "ClpSplit", "String", "query"); +} +} // namespace facebook::presto::protocol::clp +namespace facebook::presto::protocol::clp { +ClpTableHandle::ClpTableHandle() noexcept { + _type = "clp"; +} + +void to_json(json& j, const ClpTableHandle& p) { + j = json::object(); + j["@type"] = "clp"; + to_json_key( + j, + "schemaTableName", + p.schemaTableName, + "ClpTableHandle", + "SchemaTableName", + "schemaTableName"); +} + +void from_json(const json& j, ClpTableHandle& p) { + p._type = j["@type"]; + from_json_key( + j, + "schemaTableName", + p.schemaTableName, + "ClpTableHandle", + "SchemaTableName", + "schemaTableName"); +} +} // namespace facebook::presto::protocol::clp +namespace facebook::presto::protocol::clp { +ClpTableLayoutHandle::ClpTableLayoutHandle() noexcept { + _type = "clp"; +} + +void to_json(json& j, const ClpTableLayoutHandle& p) { + j = json::object(); + j["@type"] = "clp"; + to_json_key( + j, "table", p.table, "ClpTableLayoutHandle", "ClpTableHandle", "table"); + to_json_key(j, "query", p.query, "ClpTableLayoutHandle", "String", "query"); +} + +void from_json(const json& j, ClpTableLayoutHandle& p) { + p._type = j["@type"]; + from_json_key( + j, "table", p.table, "ClpTableLayoutHandle", "ClpTableHandle", "table"); + from_json_key(j, "query", p.query, "ClpTableLayoutHandle", "String", "query"); +} +} // namespace facebook::presto::protocol::clp diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h new file mode 100644 index 0000000000000..310f54a0bd1db --- /dev/null +++ b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h @@ -0,0 +1,96 @@ +// DO NOT EDIT : This file is generated by chevron +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +// This file is generated DO NOT EDIT @generated + +#include +#include + +#include "presto_cpp/external/json/nlohmann/json.hpp" +#include "presto_cpp/presto_protocol/core/presto_protocol_core.h" + +namespace facebook::presto::protocol::clp { +struct ClpTransactionHandle : public ConnectorTransactionHandle { + String instance = {}; +}; +void to_json(json& j, const ClpTransactionHandle& p); + +void from_json(const json& j, ClpTransactionHandle& p); +} // namespace facebook::presto::protocol::clp +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// ClpColumnHandle is special since it needs an implementation of +// operator<(). + +namespace facebook::presto::protocol::clp { +struct ClpColumnHandle : public ColumnHandle { + String columnName = {}; + String originalColumnName = {}; + Type columnType = {}; + boolean nullable = {}; + + ClpColumnHandle() noexcept; + + bool operator<(const ColumnHandle& o) const override { + return columnName < dynamic_cast(o).columnName; + } +}; +void to_json(json& j, const ClpColumnHandle& p); +void from_json(const json& j, ClpColumnHandle& p); +} // namespace facebook::presto::protocol::clp +namespace facebook::presto::protocol::clp { +struct ClpSplit : public ConnectorSplit { + String schemaName = {}; + String tableName = {}; + String archiveId = {}; + std::shared_ptr query = {}; + + ClpSplit() noexcept; +}; +void to_json(json& j, const ClpSplit& p); +void from_json(const json& j, ClpSplit& p); +} // namespace facebook::presto::protocol::clp +namespace facebook::presto::protocol::clp { +struct ClpTableHandle : public ConnectorTableHandle { + SchemaTableName schemaTableName = {}; + + ClpTableHandle() noexcept; +}; +void to_json(json& j, const ClpTableHandle& p); +void from_json(const json& j, ClpTableHandle& p); +} // namespace facebook::presto::protocol::clp +namespace facebook::presto::protocol::clp { +struct ClpTableLayoutHandle : public ConnectorTableLayoutHandle { + ClpTableHandle table = {}; + std::shared_ptr query = {}; + + ClpTableLayoutHandle() noexcept; +}; +void to_json(json& j, const ClpTableLayoutHandle& p); +void from_json(const json& j, ClpTableLayoutHandle& p); +} // namespace facebook::presto::protocol::clp diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.json b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.json new file mode 100644 index 0000000000000..67d5699d05d77 --- /dev/null +++ b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.json @@ -0,0 +1,121 @@ +[ + { + "comment": "// This file is generated DO NOT EDIT @generated" + }, + { + "class_name": "ClpColumnHandle", + "hinc": "/*\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n// ClpColumnHandle is special since it needs an implementation of\n// operator<().\n\nnamespace facebook::presto::protocol::clp {\nstruct ClpColumnHandle : public ColumnHandle {\n String columnName = {};\n String originalColumnName = {};\n Type columnType = {};\n boolean nullable = {};\n\n ClpColumnHandle() noexcept;\n\n bool operator<(const ColumnHandle& o) const override {\n return columnName < dynamic_cast(o).columnName;\n }\n};\nvoid to_json(json& j, const ClpColumnHandle& p);\nvoid from_json(const json& j, ClpColumnHandle& p);\n} // namespace facebook::presto::protocol::clp", + "struct": true, + "fields": [ + { + "field_type": "String", + "field_name": "columnName", + "field_text": "String", + "_N": 1, + "field_local": true + }, + { + "field_type": "String", + "field_name": "originalColumnName", + "field_text": "String", + "_N": 2, + "field_local": true + }, + { + "field_type": "Type", + "field_name": "columnType", + "field_text": "Type", + "_N": 3, + "field_local": true + }, + { + "field_type": "boolean", + "field_name": "nullable", + "field_text": "bool", + "_N": 4, + "field_local": true + } + ], + "subclass": true, + "super_class": "ColumnHandle", + "json_key": "clp" + }, + { + "class_name": "ClpSplit", + "struct": true, + "fields": [ + { + "field_type": "String", + "field_name": "schemaName", + "field_text": "String", + "_N": 1, + "field_local": true + }, + { + "field_type": "String", + "field_name": "tableName", + "field_text": "String", + "_N": 2, + "field_local": true + }, + { + "field_type": "String", + "field_name": "archiveId", + "field_text": "String", + "_N": 3, + "field_local": true + }, + { + "field_type": "Optional", + "field_name": "query", + "field_text": "String", + "optional": true, + "_N": 4, + "field_local": true + } + ], + "subclass": true, + "super_class": "ConnectorSplit", + "json_key": "clp" + }, + { + "class_name": "ClpTableHandle", + "struct": true, + "fields": [ + { + "field_type": "SchemaTableName", + "field_name": "schemaTableName", + "field_text": "SchemaTableName", + "_N": 1, + "field_local": true + } + ], + "subclass": true, + "super_class": "ConnectorTableHandle", + "json_key": "clp" + }, + { + "class_name": "ClpTableLayoutHandle", + "struct": true, + "fields": [ + { + "field_type": "ClpTableHandle", + "field_name": "table", + "field_text": "ClpTableHandle", + "_N": 1, + "field_local": true + }, + { + "field_type": "Optional", + "field_name": "query", + "field_text": "String", + "optional": true, + "_N": 2, + "field_local": true + } + ], + "subclass": true, + "super_class": "ConnectorTableLayoutHandle", + "json_key": "clp" + } +] diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.yml b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.yml new file mode 100644 index 0000000000000..0abb104d564e0 --- /dev/null +++ b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.yml @@ -0,0 +1,39 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +AbstractClasses: + ColumnHandle: + super: JsonEncodedSubclass + comparable: true + subclasses: + - { name: ClpColumnHandle, key: clp } + + ConnectorTableHandle: + super: JsonEncodedSubclass + subclasses: + - { name: ClpTableHandle, key: clp } + + ConnectorTableLayoutHandle: + super: JsonEncodedSubclass + subclasses: + - { name: ClpTableLayoutHandle, key: clp } + + ConnectorSplit: + super: JsonEncodedSubclass + subclasses: + - { name: ClpSplit, key: clp } + +JavaClasses: + - presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java + - presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java + - presto-clp/src/main/java/com/yscope/presto/ClpTableLayoutHandle.java + - presto-clp/src/main/java/com/yscope/presto/ClpSplit.java diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpColumnHandle.hpp.inc b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpColumnHandle.hpp.inc new file mode 100644 index 0000000000000..bb076b8ff23db --- /dev/null +++ b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpColumnHandle.hpp.inc @@ -0,0 +1,33 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// ClpColumnHandle is special since it needs an implementation of +// operator<(). + +namespace facebook::presto::protocol::clp { +struct ClpColumnHandle : public ColumnHandle { + String columnName = {}; + String originalColumnName = {}; + Type columnType = {}; + boolean nullable = {}; + + ClpColumnHandle() noexcept; + + bool operator<(const ColumnHandle& o) const override { + return columnName < dynamic_cast(o).columnName; + } +}; +void to_json(json& j, const ClpColumnHandle& p); +void from_json(const json& j, ClpColumnHandle& p); +} // namespace facebook::presto::protocol::clp diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpTransactionHandle.cpp.inc b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpTransactionHandle.cpp.inc new file mode 100644 index 0000000000000..a753f42ab61f1 --- /dev/null +++ b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpTransactionHandle.cpp.inc @@ -0,0 +1,30 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// ClpTransactionHandle is special since +// the corresponding class in Java is an enum. + +namespace facebook::presto::protocol::clp { + +void to_json(json& j, const ClpTransactionHandle& p) { + j = json::array(); + j.push_back(p._type); + j.push_back(p.instance); +} + +void from_json(const json& j, ClpTransactionHandle& p) { + j[0].get_to(p._type); + j[1].get_to(p.instance); +} +} // namespace facebook::presto::protocol::clp diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpTransactionHandle.hpp.inc b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpTransactionHandle.hpp.inc new file mode 100644 index 0000000000000..fc873366389eb --- /dev/null +++ b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpTransactionHandle.hpp.inc @@ -0,0 +1,28 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// ClpTransactionHandle is special since +// the corresponding class in Java is an enum. + +namespace facebook::presto::protocol::clp { + +struct ClpTransactionHandle : public ConnectorTransactionHandle { + String instance = {}; +}; + +void to_json(json& j, const ClpTransactionHandle& p); + +void from_json(const json& j, ClpTransactionHandle& p); + +} // namespace facebook::presto::protocol::clp diff --git a/presto-native-execution/presto_cpp/presto_protocol/presto_protocol.h b/presto-native-execution/presto_cpp/presto_protocol/presto_protocol.h index dd94975e3760d..876567fe12307 100644 --- a/presto-native-execution/presto_cpp/presto_protocol/presto_protocol.h +++ b/presto-native-execution/presto_cpp/presto_protocol/presto_protocol.h @@ -16,6 +16,7 @@ // DEPRECATED: This file is deprecated and will be removed in future versions. +#include "presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h" #include "presto_cpp/presto_protocol/connector/hive/presto_protocol_hive.h" #include "presto_cpp/presto_protocol/connector/iceberg/presto_protocol_iceberg.h" #include "presto_cpp/presto_protocol/connector/tpch/presto_protocol_tpch.h" From 20e761349445513135c2cc4c057e6bed72d96329 Mon Sep 17 00:00:00 2001 From: rwang22 Date: Tue, 11 Mar 2025 15:03:59 +0000 Subject: [PATCH 083/126] update presto-clp pom.xml --- presto-clp/pom.xml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/presto-clp/pom.xml b/presto-clp/pom.xml index 3c392da17fec5..69f042352b62c 100644 --- a/presto-clp/pom.xml +++ b/presto-clp/pom.xml @@ -6,16 +6,15 @@ com.facebook.presto presto-root - 0.290-SNAPSHOT + 0.292-SNAPSHOT com.yscope.presto presto-clp - Presto CLP connector - presto-plugin + presto-clp + Presto - CLP connector - UTF-8 ${project.parent.basedir} From 39fe294058d65b7a8a479e7691a0f79401d35602 Mon Sep 17 00:00:00 2001 From: rwang22 Date: Wed, 12 Mar 2025 20:46:21 +0000 Subject: [PATCH 084/126] fix build issues --- presto-clp/pom.xml | 2 +- .../src/main/java/com/yscope/presto/ClpPlanOptimizer.java | 3 ++- .../src/test/java/com/yscope/presto/TestClpQueryBase.java | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/presto-clp/pom.xml b/presto-clp/pom.xml index 69f042352b62c..3bcf8034da2b5 100644 --- a/presto-clp/pom.xml +++ b/presto-clp/pom.xml @@ -11,8 +11,8 @@ com.yscope.presto presto-clp - presto-clp Presto - CLP connector + presto-plugin ${project.parent.basedir} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java index 0de5c10889a01..6ffb227d9d4ad 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java @@ -100,7 +100,8 @@ public PlanNode visitFilter(FilterNode node, RewriteContext context) tableScanNode.getAssignments(), tableScanNode.getTableConstraints(), tableScanNode.getCurrentConstraint(), - tableScanNode.getEnforcedConstraint()); + tableScanNode.getEnforcedConstraint(), + tableScanNode.getCteMaterializationInfo()); if (!remainingPredicate.isPresent()) { return newTableScanNode; } diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpQueryBase.java b/presto-clp/src/test/java/com/yscope/presto/TestClpQueryBase.java index 546f2459eacef..42f5f51860d30 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpQueryBase.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpQueryBase.java @@ -19,7 +19,6 @@ import com.facebook.presto.metadata.FunctionAndTypeManager; import com.facebook.presto.metadata.Metadata; import com.facebook.presto.metadata.MetadataManager; -import com.facebook.presto.metadata.SessionPropertyManager; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.ConnectorSession; import com.facebook.presto.spi.WarningCollector; @@ -46,6 +45,7 @@ import static com.facebook.presto.common.type.DoubleType.DOUBLE; import static com.facebook.presto.common.type.VarcharType.VARCHAR; import static com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager; +import static com.facebook.presto.metadata.SessionPropertyManager.createTestingSessionPropertyManager; import static com.facebook.presto.sql.analyzer.ExpressionAnalyzer.getExpressionTypes; import static com.facebook.presto.testing.TestingConnectorSession.SESSION; import static java.util.stream.Collectors.toMap; @@ -76,7 +76,7 @@ protected static class SessionHolder public SessionHolder() { connectorSession = SESSION; - session = TestingSession.testSessionBuilder(new SessionPropertyManager(new SystemSessionProperties().getSessionProperties())).build(); + session = TestingSession.testSessionBuilder(createTestingSessionPropertyManager(new SystemSessionProperties().getSessionProperties())).build(); } public ConnectorSession getConnectorSession() From 2af610628dc34fd133f1e5fa8e7c40fb9380b891 Mon Sep 17 00:00:00 2001 From: rwang22 Date: Thu, 13 Mar 2025 14:57:07 +0000 Subject: [PATCH 085/126] refactor metadata, add ClpMySQLSplitProvider implementation and unit tests --- .../java/com/yscope/presto/ClpClient.java | 31 +--- .../main/java/com/yscope/presto/ClpSplit.java | 30 ++-- .../com/yscope/presto/ClpSplitManager.java | 11 -- .../presto/metadata/ClpMetadataProvider.java | 4 +- .../metadata/ClpMySQLMetadataProvider.java | 23 ++- .../presto/split/ClpMySQLSplitProvider.java | 81 ++++++----- .../com/yscope/presto/TestClpMetadata.java | 54 +++++-- .../java/com/yscope/presto/TestClpSplit.java | 133 ++++++++++++++++++ 8 files changed, 249 insertions(+), 118 deletions(-) create mode 100644 presto-clp/src/test/java/com/yscope/presto/TestClpSplit.java diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 1dbf163edc17a..e1e36a6c00671 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -69,26 +69,26 @@ public ClpClient(ClpConfig config) this.columnHandleCache = CacheBuilder.newBuilder() .expireAfterWrite(config.getMetadataExpireInterval(), SECONDS) .refreshAfterWrite(config.getMetadataRefreshInterval(), SECONDS) - .build(CacheLoader.from(this::loadTableSchema)); + .build(CacheLoader.from(this::loadColumnHandles)); this.tableNameCache = CacheBuilder.newBuilder() .expireAfterWrite(config.getMetadataExpireInterval(), SECONDS) .refreshAfterWrite(config.getMetadataRefreshInterval(), SECONDS) - .build(CacheLoader.from(this::loadTable)); + .build(CacheLoader.from(this::loadTableNames)); } - public Set loadTableSchema(SchemaTableName schemaTableName) + public Set loadColumnHandles(SchemaTableName schemaTableName) { - Set columnHandles = clpMetadataProvider.listTableSchema(schemaTableName); + Set columnHandles = clpMetadataProvider.listColumnHandles(schemaTableName); if (!config.isPolymorphicTypeEnabled()) { return columnHandles; } return handlePolymorphicType(columnHandles); } - public Set loadTable(String schemaName) + public Set loadTableNames(String schemaName) { - return clpMetadataProvider.listTables(schemaName); + return clpMetadataProvider.listTableNames(schemaName); } public Set listTables(String schemaName) @@ -99,25 +99,6 @@ public Set listTables(String schemaName) public List listSplits(ClpTableLayoutHandle layoutHandle) { return clpSplitProvider.listSplits(layoutHandle); -// if (archiveSource == ClpConfig.ArchiveSource.LOCAL) { -// Path tableDir = Paths.get(config.getClpArchiveDir(), tableName); -// if (!Files.exists(tableDir) || !Files.isDirectory(tableDir)) { -// return ImmutableList.of(); -// } -// -// try (DirectoryStream stream = Files.newDirectoryStream(tableDir)) { -// ImmutableList.Builder archiveIds = ImmutableList.builder(); -// for (Path path : stream) { -// if (Files.isDirectory(path)) { -// archiveIds.add(path.getFileName().toString()); -// } -// } -// return archiveIds.build(); -// } -// catch (Exception e) { -// return ImmutableList.of(); -// } -// } } public Set listColumns(SchemaTableName schemaTableName) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java b/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java index f13fbb80bae20..2c4ba9b89434c 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java @@ -16,6 +16,7 @@ import com.facebook.presto.spi.ConnectorSplit; import com.facebook.presto.spi.HostAddress; import com.facebook.presto.spi.NodeProvider; +import com.facebook.presto.spi.SchemaTableName; import com.facebook.presto.spi.schedule.NodeSelectionStrategy; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; @@ -31,40 +32,31 @@ public class ClpSplit implements ConnectorSplit { - private final String schemaName; - private final String tableName; - private final String archiveId; + private final SchemaTableName schemaTableName; + private final String archivePath; private final Optional query; @JsonCreator - public ClpSplit(@JsonProperty("schemaName") @Nullable String schemaName, - @JsonProperty("tableName") @Nullable String tableName, - @JsonProperty("archiveId") @Nullable String archiveId, + public ClpSplit(@JsonProperty("schemaTableName") @Nullable SchemaTableName schemaTableName, + @JsonProperty("archivePath") @Nullable String archivePath, @JsonProperty("query") Optional query) { - this.schemaName = schemaName; - this.tableName = tableName; - this.archiveId = archiveId; + this.schemaTableName = schemaTableName; + this.archivePath = archivePath; this.query = query; } @JsonProperty @Nullable - public String getSchemaName() + public SchemaTableName getSchemaTableName() { - return schemaName; + return schemaTableName; } @JsonProperty - public String getTableName() + public String getArchivePath() { - return tableName; - } - - @JsonProperty - public String getArchiveId() - { - return archiveId; + return archivePath; } @JsonProperty diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java b/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java index 0efc97d82addc..23aa5a79dc7b6 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java @@ -40,17 +40,6 @@ public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHand SplitSchedulingContext splitSchedulingContext) { ClpTableLayoutHandle layoutHandle = (ClpTableLayoutHandle) layout; -// ClpTableHandle tableHandle = layoutHandle.getTable(); -// if (!clpClient.listTables(tableHandle.getSchemaTableName().getSchemaName()).contains(tableHandle.getTableName())) { -// throw new RuntimeException("Table no longer exists: " + tableHandle.getTableName()); -// } return new FixedSplitSource(clpClient.listSplits(layoutHandle)); -// return new FixedSplitSource(clpClient.listArchiveIds(tableHandle.getTableName()) -// .stream() -// .map(archiveId -> new ClpSplit("default", -// tableHandle.getTableName(), -// archiveId, -// layoutHandle.getQuery())) -// .collect(Collectors.toList())); } } diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMetadataProvider.java b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMetadataProvider.java index 4f9568b2d0346..362b49a40dd2e 100644 --- a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMetadataProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMetadataProvider.java @@ -21,7 +21,7 @@ public interface ClpMetadataProvider { // TODO(Rui): Think about if it is necessary to return a set of ClpColumnHandle instead of a list of ClpColumnHandle - public Set listTableSchema(SchemaTableName schemaTableName); + public Set listColumnHandles(SchemaTableName schemaTableName); - public Set listTables(String schema); + public Set listTableNames(String schema); } diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java index b944975d8ff6e..28f66db38e593 100644 --- a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java @@ -28,7 +28,6 @@ import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; -import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.sql.Statement; import java.util.HashSet; @@ -41,7 +40,8 @@ public class ClpMySQLMetadataProvider public static final String COLUMN_METADATA_PREFIX = "column_metadata_"; private static final String QUERY_SELECT_COLUMNS = "SELECT * FROM %s" + COLUMN_METADATA_PREFIX + "%s"; - private static final String QUERY_SHOW_TABLES = "SHOW TABLES"; + private static final String TABLE_METADATA_TABLE_SUFFIX = "table_metadata"; + private static final String QUERY_SELECT_TABLES = "SELECT table_name FROM %s" + TABLE_METADATA_TABLE_SUFFIX; private final ClpConfig config; @@ -84,7 +84,7 @@ private Type mapColumnType(byte type) } @Override - public Set listTableSchema(SchemaTableName schemaTableName) + public Set listColumnHandles(SchemaTableName schemaTableName) { Set columnHandles = new HashSet<>(); String query = String.format(QUERY_SELECT_COLUMNS, config.getMetadataTablePrefix(), schemaTableName.getTableName()); @@ -101,32 +101,27 @@ public Set listTableSchema(SchemaTableName schemaTableName) } } catch (SQLException e) { - log.error("Failed to load table schema for: " + schemaTableName.getTableName(), e); + log.error("Failed to load table schema for %s: %s" + schemaTableName.getTableName(), e); } return columnHandles; } @Override - public Set listTables(String schema) + public Set listTableNames(String schema) { Set tableNames = new HashSet<>(); + String query = String.format(QUERY_SELECT_TABLES, config.getMetadataTablePrefix()); try (Connection connection = getConnection(); Statement statement = connection.createStatement(); - ResultSet resultSet = statement.executeQuery(QUERY_SHOW_TABLES)) { - ResultSetMetaData metaData = resultSet.getMetaData(); - String tableColumnName = metaData.getColumnName(1); + ResultSet resultSet = statement.executeQuery(query)) { while (resultSet.next()) { - String tableName = resultSet.getString(tableColumnName); - if (tableName.startsWith(config.getMetadataTablePrefix() + COLUMN_METADATA_PREFIX)) { - tableNames.add(tableName.substring((config.getMetadataTablePrefix() + COLUMN_METADATA_PREFIX).length())); - } + tableNames.add(resultSet.getString("table_name")); } } catch (SQLException e) { - log.error("Failed to load table names", e); + log.error("Failed to load table names: %s", e); } - return tableNames; } } diff --git a/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java b/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java index 498eac63161c2..13f700281aca8 100644 --- a/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java @@ -14,13 +14,18 @@ package com.yscope.presto.split; import com.facebook.airlift.log.Logger; +import com.facebook.presto.spi.SchemaTableName; +import com.google.common.collect.ImmutableList; import com.yscope.presto.ClpConfig; import com.yscope.presto.ClpSplit; import com.yscope.presto.ClpTableLayoutHandle; import java.sql.Connection; import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.ResultSet; import java.sql.SQLException; +import java.util.ArrayList; import java.util.List; public class ClpMySQLSplitProvider @@ -28,10 +33,10 @@ public class ClpMySQLSplitProvider { private static final Logger log = Logger.get(ClpMySQLSplitProvider.class); - private static final String ARCHIVE_TABLE_SUFFIX = "archives"; - private static final String TABLE_METADATA_TABLE_SUFFIX = "tables"; - private static final String QUERY_SELECT_ARCHIVE_IDS = "SELECT id FROM %s" + ARCHIVE_TABLE_SUFFIX; - private static final String QUERY_SELECT_TABLE_METADATA = "SELECT * FROM %s" + TABLE_METADATA_TABLE_SUFFIX + " WHERE AND table_name = ?"; + private static final String ARCHIVE_TABLE_SUFFIX = "_archives"; + private static final String TABLE_METADATA_TABLE_SUFFIX = "table_metadata"; + private static final String QUERY_SELECT_ARCHIVE_IDS = "SELECT id FROM %s%s" + ARCHIVE_TABLE_SUFFIX; + private static final String QUERY_SELECT_TABLE_METADATA = "SELECT table_path FROM %s" + TABLE_METADATA_TABLE_SUFFIX + " WHERE table_name = '%s'"; private final ClpConfig config; @@ -52,38 +57,46 @@ private Connection getConnection() throws SQLException } @Override - // TODO(Rui): This method is not complete yet public List listSplits(ClpTableLayoutHandle clpTableLayoutHandle) { -// List splits = new ArrayList<>(); -// String tableName = clpTableLayoutHandle.getTable().getSchemaTableName().getTableName(); -// String query = String.format(QUERY_SELECT_TABLE_METADATA, config.getMetadataTablePrefix()); -// try (Connection connection = getConnection(); -// PreparedStatement statement = connection.prepareStatement(query)) { -// statement.setString(1, schemaTableName.getTableName()); -// ResultSet resultSet = statement.executeQuery(); -// while (resultSet.next()) { -// String archiveId = resultSet.getString("archive_id"); -// } -// } -// catch (SQLException e) { -// log.error("Failed to retrieve table metadata", e); -// } -// -// List archiveIds = new ArrayList<>(); -// String query = String.format(QUERY_SELECT_ARCHIVE_IDS, config.getMetadataTablePrefix()); -// -// try (Connection connection = getConnection(); -// PreparedStatement statement = connection.prepareStatement(query); -// ResultSet resultSet = statement.executeQuery()) { -// -// while (resultSet.next()) { -// archiveIds.add(resultSet.getString("id")); -// } -// } catch (SQLException e) { -// log.error("Failed to retrieve archive IDs", e); -// } + List splits = new ArrayList<>(); + SchemaTableName tableSchemaName = clpTableLayoutHandle.getTable().getSchemaTableName(); + String tableName = tableSchemaName.getTableName(); - return null; + String tablePathQuery = String.format(QUERY_SELECT_TABLE_METADATA, config.getMetadataTablePrefix(), tableName); + String archivePathQuery = String.format(QUERY_SELECT_ARCHIVE_IDS, config.getMetadataTablePrefix(), tableName); + + try (Connection connection = getConnection()) { + // Fetch table path + String tablePath; + try (PreparedStatement statement = connection.prepareStatement(tablePathQuery); + ResultSet resultSet = statement.executeQuery()) { + if (!resultSet.next()) { + log.error("Table metadata not found for table: %s", tableName); + return ImmutableList.of(); + } + tablePath = resultSet.getString("table_path"); + } + + if (tablePath == null || tablePath.isEmpty()) { + log.error("Table path is null for table: %s", tableName); + return ImmutableList.of(); + } + + // Fetch archive IDs and create splits + try (PreparedStatement statement = connection.prepareStatement(archivePathQuery); + ResultSet resultSet = statement.executeQuery()) { + while (resultSet.next()) { + final String archiveId = resultSet.getString("id"); + final String archivePath = tablePath + "/" + archiveId; + splits.add(new ClpSplit(tableSchemaName, archivePath, clpTableLayoutHandle.getQuery())); + } + } + } + catch (SQLException e) { + log.error("Database error while processing splits for %s: %s", tableName, e); + } + + return splits; } } diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java index 860402e49c830..303aa3c206186 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java @@ -46,7 +46,6 @@ public class TestClpMetadata { private ClpMetadata metadata; - private String metadataDbUrl; private static final String TABLE_NAME = "test"; private static final String TABLE_SCHEMA = "default"; @@ -54,9 +53,13 @@ public class TestClpMetadata @BeforeMethod public void setUp() { - metadataDbUrl = "jdbc:h2:file:/tmp/testdb;MODE=MySQL;DATABASE_TO_UPPER=FALSE"; - String metadataDbTablePrefix = "clp_"; - String columnMetadataTablePrefix = "column_metadata_"; + final String metadataDbUrl = "jdbc:h2:file:/tmp/metadata_testdb;MODE=MySQL;DATABASE_TO_UPPER=FALSE"; + final String metadataDbUser = "sa"; + final String metadataDbPassword = ""; + final String metadataDbTablePrefix = "clp_"; + final String columnMetadataTablePrefix = "column_metadata_"; + final String tableMetadataSuffix = "table_metadata"; + ClpConfig config = new ClpConfig().setPolymorphicTypeEnabled(true) .setMetadataDbUrl(metadataDbUrl) .setMetadataDbUser("sa") @@ -64,12 +67,39 @@ public void setUp() .setMetadataTablePrefix(metadataDbTablePrefix); metadata = new ClpMetadata(new ClpClient(config)); - try (Connection conn = DriverManager.getConnection(metadataDbUrl, "sa", ""); + final String tableMetadataTableName = metadataDbTablePrefix + tableMetadataSuffix; + final String columnMetadataTableName = metadataDbTablePrefix + columnMetadataTablePrefix + TABLE_NAME; + + final String createTableMetadataSQL = String.format( + "CREATE TABLE IF NOT EXISTS %s (" + + " table_name VARCHAR(512) PRIMARY KEY," + + " table_path VARCHAR(1024) NOT NULL)", tableMetadataTableName); + + final String createColumnMetadataSQL = String.format( + "CREATE TABLE IF NOT EXISTS %s (" + + " name VARCHAR(512) NOT NULL," + + " type TINYINT NOT NULL," + + " PRIMARY KEY (name, type))", columnMetadataTableName); + + final String insertTableMetadataSQL = String.format( + "INSERT INTO %s (table_name, table_path) VALUES (?, ?)", tableMetadataTableName); + + final String insertColumnMetadataSQL = String.format( + "INSERT INTO %s (name, type) VALUES (?, ?)", columnMetadataTableName); + + try (Connection conn = DriverManager.getConnection(metadataDbUrl, metadataDbUser, metadataDbPassword); Statement stmt = conn.createStatement()) { - String createTable = "CREATE TABLE IF NOT EXISTS " + metadataDbTablePrefix + columnMetadataTablePrefix - + TABLE_NAME + " (name VARCHAR(512) NOT NULL, type TINYINT NOT NULL, PRIMARY KEY (name, type))"; - stmt.execute(createTable); + stmt.execute(createTableMetadataSQL); + stmt.execute(createColumnMetadataSQL); + + // Insert table metadata + try (PreparedStatement pstmt = conn.prepareStatement(insertTableMetadataSQL)) { + pstmt.setString(1, TABLE_NAME); + pstmt.setString(2, "/tmp/archives/" + TABLE_NAME); + pstmt.executeUpdate(); + } + // Insert column metadata in batch List> records = Arrays.asList( new Pair<>("a", ClpNodeType.Integer), new Pair<>("a", ClpNodeType.VarString), @@ -79,9 +109,7 @@ public void setUp() new Pair<>("c.d", ClpNodeType.Boolean), new Pair<>("c.e", ClpNodeType.VarString)); - String insertSQL = "INSERT INTO " + metadataDbTablePrefix + columnMetadataTablePrefix + TABLE_NAME - + " (name, type) VALUES (?, ?)"; - try (PreparedStatement pstmt = conn.prepareStatement(insertSQL)) { + try (PreparedStatement pstmt = conn.prepareStatement(insertColumnMetadataSQL)) { for (Pair record : records) { pstmt.setString(1, record.getFirst()); pstmt.setByte(2, record.getSecond().getType()); @@ -98,8 +126,8 @@ public void setUp() @AfterMethod public void tearDown() { - File dbFile = new File("/tmp/testdb.mv.db"); - File lockFile = new File("/tmp/testdb.trace.db"); // Optional, H2 sometimes creates this + File dbFile = new File("/tmp/metadata_testdb.mv.db"); + File lockFile = new File("/tmp/metadata_testdb.trace.db"); // Optional, H2 sometimes creates this if (dbFile.exists()) { dbFile.delete(); System.out.println("Deleted database file: " + dbFile.getAbsolutePath()); diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpSplit.java b/presto-clp/src/test/java/com/yscope/presto/TestClpSplit.java new file mode 100644 index 0000000000000..d1e1d886965c8 --- /dev/null +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpSplit.java @@ -0,0 +1,133 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yscope.presto; + +import com.facebook.presto.spi.SchemaTableName; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.io.File; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.fail; + +@Test(singleThreaded = true) +public class TestClpSplit +{ + ClpClient client; + private static final String TABLE_NAME_1 = "test_1"; + private static final String TABLE_NAME_2 = "test_2"; + private static final String TABLE_NAME_3 = "test_3"; + private static final String TABLE_SCHEMA = "default"; + private static final List TABLE_NAME_LIST = Arrays.asList(TABLE_NAME_1, TABLE_NAME_2, TABLE_NAME_3); + private static final int NUM_SPLITS = 10; + + @BeforeMethod + public void setUp() + { + final String metadataDbUrl = "jdbc:h2:file:/tmp/split_testdb;MODE=MySQL;DATABASE_TO_UPPER=FALSE"; + final String metadataDbUser = "sa"; + final String metadataDbPassword = ""; + final String metadataDbTablePrefix = "clp_"; + final String tableMetadataSuffix = "table_metadata"; + final String archiveTableSuffix = "_archives"; + + ClpConfig config = new ClpConfig().setPolymorphicTypeEnabled(true) + .setMetadataDbUrl(metadataDbUrl) + .setMetadataDbUser("sa") + .setMetadataDbPassword("") + .setMetadataTablePrefix(metadataDbTablePrefix); + client = new ClpClient(config); + + final String tableMetadataTableName = metadataDbTablePrefix + tableMetadataSuffix; + final String archiveTableFormat = metadataDbTablePrefix + "%s" + archiveTableSuffix; + + final String createTableMetadataSQL = String.format( + "CREATE TABLE IF NOT EXISTS %s (" + + " table_name VARCHAR(512) PRIMARY KEY," + + " table_path VARCHAR(1024) NOT NULL)", tableMetadataTableName); + + try (Connection conn = DriverManager.getConnection(metadataDbUrl, metadataDbUser, metadataDbPassword); + Statement stmt = conn.createStatement()) { + stmt.execute(createTableMetadataSQL); + + // Insert table metadata in batch + String insertTableMetadataSQL = String.format("INSERT INTO %s (table_name, table_path) VALUES (?, ?)", tableMetadataTableName); + try (PreparedStatement pstmt = conn.prepareStatement(insertTableMetadataSQL)) { + for (String tableName : TABLE_NAME_LIST) { + pstmt.setString(1, tableName); + pstmt.setString(2, "/tmp/archives/" + tableName); + pstmt.addBatch(); + } + pstmt.executeBatch(); + } + + // Create and populate archive tables + for (String tableName : TABLE_NAME_LIST) { + String archiveTableName = String.format(archiveTableFormat, tableName); + String createArchiveTableSQL = String.format("CREATE TABLE IF NOT EXISTS %s (id VARCHAR(128) PRIMARY KEY)", archiveTableName); + stmt.execute(createArchiveTableSQL); + + String insertArchiveTableSQL = String.format("INSERT INTO %s (id) VALUES (?)", archiveTableName); + try (PreparedStatement pstmt = conn.prepareStatement(insertArchiveTableSQL)) { + for (int i = 0; i < NUM_SPLITS; i++) { + pstmt.setString(1, "id_" + i); + pstmt.addBatch(); + } + pstmt.executeBatch(); + } + } + } + catch (SQLException e) { + fail(e.getMessage()); + } + } + + @AfterMethod + public void tearDown() + { + File dbFile = new File("/tmp/split_testdb.mv.db"); + File lockFile = new File("/tmp/split_testdb.trace.db"); // Optional, H2 sometimes creates this + if (dbFile.exists()) { + dbFile.delete(); + System.out.println("Deleted database file: " + dbFile.getAbsolutePath()); + } + if (lockFile.exists()) { + lockFile.delete(); + } + } + + @Test + public void testListSplits() + { + for (String tableName : TABLE_NAME_LIST) { + ClpTableLayoutHandle layoutHandle = new ClpTableLayoutHandle(new ClpTableHandle(new SchemaTableName(TABLE_SCHEMA, tableName)), Optional.empty()); + List splits = client.listSplits(layoutHandle); + assertEquals(splits.size(), NUM_SPLITS); + for (int i = 0; i < NUM_SPLITS; i++) { + assertEquals(splits.get(i).getArchivePath(), "/tmp/archives/" + tableName + "/id_" + i); + assertEquals(splits.get(i).getQuery(), Optional.empty()); + } + } + } +} From 3195c9fc87983373a3faec20d631d48c2c02edf2 Mon Sep 17 00:00:00 2001 From: rwang22 Date: Mon, 17 Mar 2025 13:12:18 +0000 Subject: [PATCH 086/126] apply presto-native-execution changes --- .../connector/clp/presto_protocol_clp.cpp | 22 ++++++++++++++----- .../connector/clp/presto_protocol_clp.h | 5 ++--- .../connector/clp/presto_protocol_clp.json | 17 +++++--------- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.cpp b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.cpp index 52be07e54cc2a..72e1846cafe22 100644 --- a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.cpp +++ b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.cpp @@ -81,18 +81,28 @@ void to_json(json& j, const ClpSplit& p) { j = json::object(); j["@type"] = "clp"; to_json_key( - j, "schemaName", p.schemaName, "ClpSplit", "String", "schemaName"); - to_json_key(j, "tableName", p.tableName, "ClpSplit", "String", "tableName"); - to_json_key(j, "archiveId", p.archiveId, "ClpSplit", "String", "archiveId"); + j, + "schemaTableName", + p.schemaTableName, + "ClpSplit", + "SchemaTableName", + "schemaTableName"); + to_json_key( + j, "archivePath", p.archivePath, "ClpSplit", "String", "archivePath"); to_json_key(j, "query", p.query, "ClpSplit", "String", "query"); } void from_json(const json& j, ClpSplit& p) { p._type = j["@type"]; from_json_key( - j, "schemaName", p.schemaName, "ClpSplit", "String", "schemaName"); - from_json_key(j, "tableName", p.tableName, "ClpSplit", "String", "tableName"); - from_json_key(j, "archiveId", p.archiveId, "ClpSplit", "String", "archiveId"); + j, + "schemaTableName", + p.schemaTableName, + "ClpSplit", + "SchemaTableName", + "schemaTableName"); + from_json_key( + j, "archivePath", p.archivePath, "ClpSplit", "String", "archivePath"); from_json_key(j, "query", p.query, "ClpSplit", "String", "query"); } } // namespace facebook::presto::protocol::clp diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h index 310f54a0bd1db..bfa29311e5641 100644 --- a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h +++ b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h @@ -65,9 +65,8 @@ void from_json(const json& j, ClpColumnHandle& p); } // namespace facebook::presto::protocol::clp namespace facebook::presto::protocol::clp { struct ClpSplit : public ConnectorSplit { - String schemaName = {}; - String tableName = {}; - String archiveId = {}; + SchemaTableName schemaTableName = {}; + String archivePath = {}; std::shared_ptr query = {}; ClpSplit() noexcept; diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.json b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.json index 67d5699d05d77..9f22728b175e8 100644 --- a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.json +++ b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.json @@ -45,32 +45,25 @@ "struct": true, "fields": [ { - "field_type": "String", - "field_name": "schemaName", - "field_text": "String", + "field_type": "SchemaTableName", + "field_name": "schemaTableName", + "field_text": "SchemaTableName", "_N": 1, "field_local": true }, { "field_type": "String", - "field_name": "tableName", + "field_name": "archivePath", "field_text": "String", "_N": 2, "field_local": true }, - { - "field_type": "String", - "field_name": "archiveId", - "field_text": "String", - "_N": 3, - "field_local": true - }, { "field_type": "Optional", "field_name": "query", "field_text": "String", "optional": true, - "_N": 4, + "_N": 3, "field_local": true } ], From cf6faade2e4fa79cf02f3406e6131f2b03774d67 Mon Sep 17 00:00:00 2001 From: rwang22 Date: Mon, 17 Mar 2025 13:29:58 +0000 Subject: [PATCH 087/126] update ClpMySQLSplitProvider and ClpMySQLMetadataProvider --- .../yscope/presto/metadata/ClpMySQLMetadataProvider.java | 7 ++++++- .../com/yscope/presto/split/ClpMySQLSplitProvider.java | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java index 28f66db38e593..77c1a48a32fdb 100644 --- a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java @@ -58,7 +58,12 @@ public ClpMySQLMetadataProvider(ClpConfig config) private Connection getConnection() throws SQLException { - return DriverManager.getConnection(config.getMetadataDbUrl(), config.getMetadataDbUser(), config.getMetadataDbPassword()); + Connection connection = DriverManager.getConnection(config.getMetadataDbUrl(), config.getMetadataDbUser(), config.getMetadataDbPassword()); + String dbName = config.getMetadataDbName(); + if (dbName != null && !dbName.isEmpty()) { + connection.createStatement().execute("USE " + dbName); + } + return connection; } // TODO(Rui): Consider move it to a util class diff --git a/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java b/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java index 13f700281aca8..7888cc0a375bf 100644 --- a/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java @@ -53,7 +53,12 @@ public ClpMySQLSplitProvider(ClpConfig config) private Connection getConnection() throws SQLException { - return DriverManager.getConnection(config.getMetadataDbUrl(), config.getMetadataDbUser(), config.getMetadataDbPassword()); + Connection connection = DriverManager.getConnection(config.getMetadataDbUrl(), config.getMetadataDbUser(), config.getMetadataDbPassword()); + String dbName = config.getMetadataDbName(); + if (dbName != null && !dbName.isEmpty()) { + connection.createStatement().execute("USE " + dbName); + } + return connection; } @Override From c9415c607c50e14ff242c158be8c0d0c466ac71b Mon Sep 17 00:00:00 2001 From: rwang22 Date: Mon, 17 Mar 2025 13:49:58 +0000 Subject: [PATCH 088/126] apply changes in PrestoToVeloxConnector.cpp --- .../main/types/PrestoToVeloxConnector.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp index 1cf49ff44bc84..c5fda5b8b3ed9 100644 --- a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp +++ b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp @@ -1561,21 +1561,21 @@ std::unique_ptr ClpPrestoToVeloxConnector::toVeloxSplit( const protocol::ConnectorId& catalogId, const protocol::ConnectorSplit* connectorSplit) const { - auto clpSplit = dynamic_cast(connectorSplit); + auto clpSplit = dynamic_cast(connectorSplit); VELOX_CHECK_NOT_NULL( clpSplit, "Unexpected split type {}", connectorSplit->_type); return std::make_unique( catalogId, - clpSplit->schemaName, - clpSplit->tableName, - clpSplit->archiveId); + clpSplit->schemaTableName.schema, + clpSplit->schemaTableName.table, + clpSplit->archivePath); } std::unique_ptr ClpPrestoToVeloxConnector::toVeloxColumnHandle( const protocol::ColumnHandle* column, const TypeParser& typeParser) const { - auto clpColumn = dynamic_cast(column); + auto clpColumn = dynamic_cast(column); VELOX_CHECK_NOT_NULL( clpColumn, "Unexpected column handle type {}", column->_type); return std::make_unique( @@ -1593,7 +1593,7 @@ ClpPrestoToVeloxConnector::toVeloxTableHandle( std::string, std::shared_ptr>& assignments) const { auto clpLayout = - std::dynamic_pointer_cast( + std::dynamic_pointer_cast( tableHandle.connectorTableLayout); VELOX_CHECK_NOT_NULL( clpLayout, @@ -1605,7 +1605,7 @@ ClpPrestoToVeloxConnector::toVeloxTableHandle( std::unique_ptr ClpPrestoToVeloxConnector::createConnectorProtocol() const { - return std::make_unique(); + return std::make_unique(); } } // namespace facebook::presto From 0a5178cfc6ff7d4e55038c9f3e6245a614d31a00 Mon Sep 17 00:00:00 2001 From: rwang22 Date: Mon, 17 Mar 2025 15:55:52 +0000 Subject: [PATCH 089/126] update presto-clp description and update split APIs --- presto-clp/pom.xml | 2 +- .../presto_cpp/main/types/PrestoToVeloxConnector.cpp | 3 ++- .../presto_cpp/main/types/PrestoToVeloxConnector.h | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/presto-clp/pom.xml b/presto-clp/pom.xml index 3bcf8034da2b5..c09c61c0a7272 100644 --- a/presto-clp/pom.xml +++ b/presto-clp/pom.xml @@ -11,7 +11,7 @@ com.yscope.presto presto-clp - Presto - CLP connector + Presto - CLP Connector presto-plugin diff --git a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp index c5fda5b8b3ed9..c4677c38d5dbf 100644 --- a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp +++ b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp @@ -1560,7 +1560,8 @@ TpchPrestoToVeloxConnector::createConnectorProtocol() const { std::unique_ptr ClpPrestoToVeloxConnector::toVeloxSplit( const protocol::ConnectorId& catalogId, - const protocol::ConnectorSplit* connectorSplit) const { + const protocol::ConnectorSplit* connectorSplit, + const protocol::SplitContext* splitContext) const { auto clpSplit = dynamic_cast(connectorSplit); VELOX_CHECK_NOT_NULL( clpSplit, "Unexpected split type {}", connectorSplit->_type); diff --git a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.h b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.h index e01ce0e43b7f0..ef3599717dbce 100644 --- a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.h +++ b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.h @@ -222,7 +222,8 @@ class ClpPrestoToVeloxConnector final : public PrestoToVeloxConnector { std::unique_ptr toVeloxSplit( const protocol::ConnectorId& catalogId, - const protocol::ConnectorSplit* connectorSplit) const final; + const protocol::ConnectorSplit* connectorSplit, + const protocol::SplitContext* splitContext) const final; std::unique_ptr toVeloxColumnHandle( const protocol::ColumnHandle* column, From ac0e315cbad8e5ae1ea7743923483a91a3963179 Mon Sep 17 00:00:00 2001 From: rwang22 Date: Mon, 17 Mar 2025 18:34:07 +0000 Subject: [PATCH 090/126] advance velox --- presto-native-execution/velox | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-native-execution/velox b/presto-native-execution/velox index 772f4290b8cb6..4b741193b04a0 160000 --- a/presto-native-execution/velox +++ b/presto-native-execution/velox @@ -1 +1 @@ -Subproject commit 772f4290b8cb60262751a0143474ee2e6abf17b4 +Subproject commit 4b741193b04a0054e090e0858b68920ec4a66c82 From 2938392c887b32aa5532e24d20e4ad00016aac0e Mon Sep 17 00:00:00 2001 From: rwang22 Date: Tue, 18 Mar 2025 03:09:35 +0000 Subject: [PATCH 091/126] update presto-native-execution and velox --- presto-native-execution/presto_cpp/main/PrestoServer.cpp | 6 ++++++ .../presto_cpp/presto_protocol/presto_protocol.cpp | 1 + presto-native-execution/velox | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/presto-native-execution/presto_cpp/main/PrestoServer.cpp b/presto-native-execution/presto_cpp/main/PrestoServer.cpp index f115a20f17fc6..7da3a56ee7578 100644 --- a/presto-native-execution/presto_cpp/main/PrestoServer.cpp +++ b/presto-native-execution/presto_cpp/main/PrestoServer.cpp @@ -48,6 +48,7 @@ #include "velox/common/memory/MmapAllocator.h" #include "velox/common/memory/SharedArbitrator.h" #include "velox/connectors/Connector.h" +#include "velox/connectors/clp/ClpConnector.h" #include "velox/connectors/hive/HiveConnector.h" #include "velox/connectors/hive/HiveDataSink.h" #include "velox/connectors/hive/storage_adapters/abfs/RegisterAbfsFileSystem.h" @@ -1185,6 +1186,11 @@ void PrestoServer::registerConnectorFactories() { velox::connector::registerConnectorFactory( std::make_shared()); } + if (!velox::connector::hasConnectorFactory( + velox::connector::clp::ClpConnectorFactory::kClpConnectorName)) { + velox::connector::registerConnectorFactory( + std::make_shared()); + } } std::vector PrestoServer::registerConnectors( diff --git a/presto-native-execution/presto_cpp/presto_protocol/presto_protocol.cpp b/presto-native-execution/presto_cpp/presto_protocol/presto_protocol.cpp index c15084817a434..c21bd69857e14 100644 --- a/presto-native-execution/presto_cpp/presto_protocol/presto_protocol.cpp +++ b/presto-native-execution/presto_cpp/presto_protocol/presto_protocol.cpp @@ -15,6 +15,7 @@ // DEPRECATED: This file is deprecated and will be removed in future versions. +#include "presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.cpp" #include "presto_cpp/presto_protocol/connector/hive/presto_protocol_hive.cpp" #include "presto_cpp/presto_protocol/connector/iceberg/presto_protocol_iceberg.cpp" #include "presto_cpp/presto_protocol/connector/tpch/presto_protocol_tpch.cpp" diff --git a/presto-native-execution/velox b/presto-native-execution/velox index 4b741193b04a0..16932ecfc32b9 160000 --- a/presto-native-execution/velox +++ b/presto-native-execution/velox @@ -1 +1 @@ -Subproject commit 4b741193b04a0054e090e0858b68920ec4a66c82 +Subproject commit 16932ecfc32b94734640e44d03cbb1e5361c50c6 From cf966325795182a63ca9112f3d5dd590140f5bf8 Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 20 Mar 2025 02:34:37 +0000 Subject: [PATCH 092/126] revert presto-native-execution/Makefile back and clean up ClpConfig --- .../java/com/yscope/presto/ClpConfig.java | 39 ------------------- presto-native-execution/Makefile | 4 +- 2 files changed, 2 insertions(+), 41 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java index 09d3d8d1b44b6..999824adc026a 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java @@ -48,9 +48,6 @@ public enum SplitSource private ArchiveSource archiveSource = ArchiveSource.LOCAL; // TODO(Rui): We need to add it in the example configuration files and in Velox private SplitSource splitSource = SplitSource.MYSQL; - private String clpArchiveDir; - private String s3Bucket; - private String s3KeyPrefix; public boolean isPolymorphicTypeEnabled() { @@ -183,40 +180,4 @@ public ClpConfig setSplitSource(SplitSource splitSource) this.splitSource = splitSource; return this; } - - public String getClpArchiveDir() - { - return clpArchiveDir; - } - - @Config("clp.archive-dir") - public ClpConfig setClpArchiveDir(String clpArchiveDir) - { - this.clpArchiveDir = clpArchiveDir; - return this; - } - - public String getS3Bucket() - { - return s3Bucket; - } - - @Config("clp.s3-bucket") - public ClpConfig setS3Bucket(String s3Bucket) - { - this.s3Bucket = s3Bucket; - return this; - } - - public String getS3KeyPrefix() - { - return s3KeyPrefix; - } - - @Config("clp.s3-key-prefix") - public ClpConfig setS3KeyPrefix(String s3KeyPrefix) - { - this.s3KeyPrefix = s3KeyPrefix; - return this; - } } diff --git a/presto-native-execution/Makefile b/presto-native-execution/Makefile index cf9a7a63ce747..f3fb5f709f4d5 100644 --- a/presto-native-execution/Makefile +++ b/presto-native-execution/Makefile @@ -14,7 +14,7 @@ BUILD_BASE_DIR=_build BUILD_DIR=release BUILD_TYPE=Release -TREAT_WARNINGS_AS_ERRORS = 0 +TREAT_WARNINGS_AS_ERRORS ?= 1 ENABLE_WALL ?= 1 NUM_THREADS ?= $(shell getconf _NPROCESSORS_CONF 2>/dev/null || echo 1) CPU_TARGET ?= "avx" @@ -94,7 +94,7 @@ release: #: Build the release version cmake-and-build: #: cmake and build without updating submodules which requires git cmake -B "$(BUILD_BASE_DIR)/$(BUILD_DIR)" $(FORCE_COLOR) $(CMAKE_FLAGS) $(EXTRA_CMAKE_FLAGS) - cmake --build $(BUILD_BASE_DIR)/$(BUILD_DIR) --target presto_server -j $(NUM_THREADS) + cmake --build $(BUILD_BASE_DIR)/$(BUILD_DIR) -j $(NUM_THREADS) unittest: debug #: Build with debugging and run unit tests cd $(BUILD_BASE_DIR)/debug && ctest -j $(NUM_THREADS) -VV --output-on-failure --exclude-regex velox.* From 6fabaa108fcfda433177e12da07067d71cadbeda Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 20 Mar 2025 02:42:08 +0000 Subject: [PATCH 093/126] remove dependency-reduced-pom.xml --- .../dependency-reduced-pom.xml | 68 ------ .../dependency-reduced-pom.xml | 57 ----- presto-verifier/dependency-reduced-pom.xml | 216 ------------------ 3 files changed, 341 deletions(-) delete mode 100644 presto-product-tests/dependency-reduced-pom.xml delete mode 100644 presto-testing-server-launcher/dependency-reduced-pom.xml delete mode 100644 presto-verifier/dependency-reduced-pom.xml diff --git a/presto-product-tests/dependency-reduced-pom.xml b/presto-product-tests/dependency-reduced-pom.xml deleted file mode 100644 index d01741629acfe..0000000000000 --- a/presto-product-tests/dependency-reduced-pom.xml +++ /dev/null @@ -1,68 +0,0 @@ - - - - presto-root - com.facebook.presto - 0.290-SNAPSHOT - - 4.0.0 - presto-product-tests - presto-product-tests - - - - true - src/main/resources - - presto.env - - - - src/main/resources - - presto.env - - - - - - maven-shade-plugin - - - package - - shade - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - true - executable - - - - ${main-class} - - - - - - - - - - - 2.12.2 - ${project.parent.basedir} - true - com.facebook.presto.tests.TemptoProductTestRunner - - diff --git a/presto-testing-server-launcher/dependency-reduced-pom.xml b/presto-testing-server-launcher/dependency-reduced-pom.xml deleted file mode 100644 index 7a36a589bee01..0000000000000 --- a/presto-testing-server-launcher/dependency-reduced-pom.xml +++ /dev/null @@ -1,57 +0,0 @@ - - - - presto-root - com.facebook.presto - 0.290-SNAPSHOT - - 4.0.0 - presto-testing-server-launcher - presto-testing-server-launcher - - - - maven-shade-plugin - - - package - - shade - - - true - executable - - - - ${main-class} - - - - - - - - - org.skife.maven - really-executable-jar-maven-plugin - - - package - - really-executable-jar - - - - - -Xmx1G - executable - - - - - - ${project.parent.basedir} - com.facebook.presto.server.testing.TestingPrestoServerLauncher - - diff --git a/presto-verifier/dependency-reduced-pom.xml b/presto-verifier/dependency-reduced-pom.xml deleted file mode 100644 index cda43d6a5266f..0000000000000 --- a/presto-verifier/dependency-reduced-pom.xml +++ /dev/null @@ -1,216 +0,0 @@ - - - - presto-root - com.facebook.presto - 0.290-SNAPSHOT - - 4.0.0 - presto-verifier - presto-verifier - - - - maven-shade-plugin - - - package - - shade - - - true - executable - - - - - ${main-class} - - - - - - - - - maven-dependency-plugin - - - unpack-launcher - prepare-package - - unpack-dependencies - - - false - launcher - provided - ${project.build.directory}/dependency/launcher - - - - - - maven-assembly-plugin - - - bin - package - - single - - - - tar.gz - - - src/main/assembly/presto-verifier.xml - - presto-verifier-${project.version} - false - - - - - - - - - org.jetbrains - annotations - 19.0.0 - provided - - - com.facebook.presto - presto-main - 0.290-SNAPSHOT - test-jar - test - - - com.facebook.presto - presto-memory - 0.290-SNAPSHOT - test - - - com.facebook.presto - presto-tests - 0.290-SNAPSHOT - test - - - presto-blackhole - com.facebook.presto - - - presto-function-namespace-managers - com.facebook.presto - - - tpch - io.airlift.tpch - - - h2 - com.h2database - - - - - com.facebook.presto - presto-tpch - 0.290-SNAPSHOT - test - - - tpch - io.airlift.tpch - - - - - com.facebook.airlift - testing - 0.215 - test - - - com.facebook.presto - testing-mysql-server-5 - 0.6 - test - - - com.facebook.presto - testing-mysql-server-base - 0.6 - test - - - command - io.airlift - - - - - org.testng - testng - 7.5 - test - - - junit - junit - - - guice - com.google.inject - - - jcommander - com.beust - - - jquery - org.webjars - - - - - org.assertj - assertj-core - 3.8.0 - test - - - com.facebook.presto - presto-testng-services - 0.290-SNAPSHOT - test - - - com.facebook.airlift - launcher - 0.215 - tar.gz - bin - provided - - - com.facebook.airlift - launcher - 0.215 - tar.gz - properties - provided - - - - ${project.artifactId} - ${project.parent.basedir} - com.facebook.presto.verifier.PrestoVerifier - - From 19fdd85c4319c6c89dbaf7aabe628845ef7d3bc1 Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 20 Mar 2025 02:44:11 +0000 Subject: [PATCH 094/126] revert gitignore back --- .gitignore | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 8b83d8075e311..a4512f9f794d9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,7 @@ *.iml *.ipr *.iws -target/ -etc/ -data/ +target /var /*/var/ /presto-product-tests/**/var/ From 27a3749f07b9aeb86719458ce005bdbbda758d8c Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 20 Mar 2025 02:45:47 +0000 Subject: [PATCH 095/126] remove dependency-reduced-pom.xml --- .../dependency-reduced-pom.xml | 89 -------------- presto-cli/dependency-reduced-pom.xml | 111 ------------------ 2 files changed, 200 deletions(-) delete mode 100644 presto-benchmark-driver/dependency-reduced-pom.xml delete mode 100644 presto-cli/dependency-reduced-pom.xml diff --git a/presto-benchmark-driver/dependency-reduced-pom.xml b/presto-benchmark-driver/dependency-reduced-pom.xml deleted file mode 100644 index 7b41f582da5dd..0000000000000 --- a/presto-benchmark-driver/dependency-reduced-pom.xml +++ /dev/null @@ -1,89 +0,0 @@ - - - - presto-root - com.facebook.presto - 0.290-SNAPSHOT - - 4.0.0 - presto-benchmark-driver - presto-benchmark-driver - - - - maven-shade-plugin - - - package - - shade - - - true - executable - - - - ${main-class} - - - - - - - - - org.skife.maven - really-executable-jar-maven-plugin - - - package - - really-executable-jar - - - - - -Xmx1G - executable - - - - - - - com.facebook.presto - presto-testng-services - 0.290-SNAPSHOT - test - - - org.testng - testng - 7.5 - test - - - junit - junit - - - guice - com.google.inject - - - jcommander - com.beust - - - jquery - org.webjars - - - - - - ${project.parent.basedir} - com.facebook.presto.benchmark.driver.PrestoBenchmarkDriver - - diff --git a/presto-cli/dependency-reduced-pom.xml b/presto-cli/dependency-reduced-pom.xml deleted file mode 100644 index e7fc4f4cd553e..0000000000000 --- a/presto-cli/dependency-reduced-pom.xml +++ /dev/null @@ -1,111 +0,0 @@ - - - - presto-root - com.facebook.presto - 0.290-SNAPSHOT - - 4.0.0 - presto-cli - presto-cli - - - - maven-shade-plugin - - - package - - shade - - - true - executable - - - - ${main-class} - - - - - - - - - org.basepom.maven - duplicate-finder-maven-plugin - - - - org.fusesource.jansi - jansi - - - - - - org.skife.maven - really-executable-jar-maven-plugin - - - package - - really-executable-jar - - - - - -Xmx1G - executable - - - - - - - org.testng - testng - 7.5 - test - - - junit - junit - - - guice - com.google.inject - - - jcommander - com.beust - - - jquery - org.webjars - - - - - com.squareup.okhttp3 - mockwebserver - 3.9.0 - test - - - bcprov-jdk15on - org.bouncycastle - - - junit - junit - - - - - - ${project.parent.basedir} - com.facebook.presto.cli.Presto - - From e173b0a7a3f44d2d96a8b6d6760530735148df72 Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 20 Mar 2025 13:45:07 +0000 Subject: [PATCH 096/126] change set to list for metadata --- .../java/com/yscope/presto/ClpClient.java | 20 +++++++++---------- .../presto/metadata/ClpMetadataProvider.java | 7 +++---- .../metadata/ClpMySQLMetadataProvider.java | 12 +++++------ 3 files changed, 18 insertions(+), 21 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index e1e36a6c00671..8b6821f84af50 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -28,10 +28,8 @@ import java.util.ArrayList; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Set; import static java.util.Objects.requireNonNull; import static java.util.concurrent.TimeUnit.SECONDS; @@ -41,8 +39,8 @@ public class ClpClient private static final Logger log = Logger.get(ClpClient.class); private final ClpConfig config; - private final LoadingCache> columnHandleCache; - private final LoadingCache> tableNameCache; + private final LoadingCache> columnHandleCache; + private final LoadingCache> tableNameCache; private final ClpMetadataProvider clpMetadataProvider; private final ClpSplitProvider clpSplitProvider; @@ -77,21 +75,21 @@ public ClpClient(ClpConfig config) .build(CacheLoader.from(this::loadTableNames)); } - public Set loadColumnHandles(SchemaTableName schemaTableName) + public List loadColumnHandles(SchemaTableName schemaTableName) { - Set columnHandles = clpMetadataProvider.listColumnHandles(schemaTableName); + List columnHandles = clpMetadataProvider.listColumnHandles(schemaTableName); if (!config.isPolymorphicTypeEnabled()) { return columnHandles; } return handlePolymorphicType(columnHandles); } - public Set loadTableNames(String schemaName) + public List loadTableNames(String schemaName) { return clpMetadataProvider.listTableNames(schemaName); } - public Set listTables(String schemaName) + public List listTables(String schemaName) { return tableNameCache.getUnchecked(schemaName); } @@ -101,15 +99,15 @@ public List listSplits(ClpTableLayoutHandle layoutHandle) return clpSplitProvider.listSplits(layoutHandle); } - public Set listColumns(SchemaTableName schemaTableName) + public List listColumns(SchemaTableName schemaTableName) { return columnHandleCache.getUnchecked(schemaTableName); } - private Set handlePolymorphicType(Set columnHandles) + private List handlePolymorphicType(List columnHandles) { Map> columnNameToColumnHandles = new HashMap<>(); - HashSet polymorphicColumnHandles = new HashSet<>(); + List polymorphicColumnHandles = new ArrayList<>(); for (ClpColumnHandle columnHandle : columnHandles) { columnNameToColumnHandles.computeIfAbsent(columnHandle.getColumnName(), k -> new ArrayList<>()) diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMetadataProvider.java b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMetadataProvider.java index 362b49a40dd2e..c400dc8864d5d 100644 --- a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMetadataProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMetadataProvider.java @@ -16,12 +16,11 @@ import com.facebook.presto.spi.SchemaTableName; import com.yscope.presto.ClpColumnHandle; -import java.util.Set; +import java.util.List; public interface ClpMetadataProvider { - // TODO(Rui): Think about if it is necessary to return a set of ClpColumnHandle instead of a list of ClpColumnHandle - public Set listColumnHandles(SchemaTableName schemaTableName); + public List listColumnHandles(SchemaTableName schemaTableName); - public Set listTableNames(String schema); + public List listTableNames(String schema); } diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java index 77c1a48a32fdb..d7de027d8d28f 100644 --- a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java @@ -30,8 +30,8 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; -import java.util.HashSet; -import java.util.Set; +import java.util.ArrayList; +import java.util.List; public class ClpMySQLMetadataProvider implements ClpMetadataProvider @@ -89,9 +89,9 @@ private Type mapColumnType(byte type) } @Override - public Set listColumnHandles(SchemaTableName schemaTableName) + public List listColumnHandles(SchemaTableName schemaTableName) { - Set columnHandles = new HashSet<>(); + List columnHandles = new ArrayList<>(); String query = String.format(QUERY_SELECT_COLUMNS, config.getMetadataTablePrefix(), schemaTableName.getTableName()); try (Connection connection = getConnection(); @@ -112,9 +112,9 @@ public Set listColumnHandles(SchemaTableName schemaTableName) } @Override - public Set listTableNames(String schema) + public List listTableNames(String schema) { - Set tableNames = new HashSet<>(); + List tableNames = new ArrayList<>(); String query = String.format(QUERY_SELECT_TABLES, config.getMetadataTablePrefix()); try (Connection connection = getConnection(); From 7e0dbf52d935bdefc509ba273f1f2a2f1fde3969 Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 20 Mar 2025 13:52:39 +0000 Subject: [PATCH 097/126] apply coderabbit suggestions --- presto-clp/src/main/java/com/yscope/presto/ClpModule.java | 3 --- .../com/yscope/presto/metadata/ClpMySQLMetadataProvider.java | 1 + .../java/com/yscope/presto/split/ClpMySQLSplitProvider.java | 1 + presto-native-execution/presto_cpp/presto_protocol/Makefile | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpModule.java b/presto-clp/src/main/java/com/yscope/presto/ClpModule.java index acce28a23da0f..311a9b259e01b 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpModule.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpModule.java @@ -28,10 +28,7 @@ public void configure(Binder binder) binder.bind(ClpConnector.class).in(Scopes.SINGLETON); binder.bind(ClpMetadata.class).in(Scopes.SINGLETON); binder.bind(ClpSplitManager.class).in(Scopes.SINGLETON); -// binder.bind(ClpRecordSetProvider.class).in(Scopes.SINGLETON); binder.bind(ClpClient.class).in(Scopes.SINGLETON); configBinder(binder).bindConfig(ClpConfig.class); } - - // TODO: type deserializer } diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java index d7de027d8d28f..a3b89842756a2 100644 --- a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java @@ -52,6 +52,7 @@ public ClpMySQLMetadataProvider(ClpConfig config) } catch (ClassNotFoundException e) { log.error(e, "Failed to load MySQL JDBC driver"); + throw new RuntimeException("MySQL JDBC driver not found", e); } this.config = config; } diff --git a/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java b/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java index 7888cc0a375bf..0331d93aa1d1a 100644 --- a/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java @@ -47,6 +47,7 @@ public ClpMySQLSplitProvider(ClpConfig config) } catch (ClassNotFoundException e) { log.error(e, "Failed to load MySQL JDBC driver"); + throw new RuntimeException("MySQL JDBC driver not found", e); } this.config = config; } diff --git a/presto-native-execution/presto_cpp/presto_protocol/Makefile b/presto-native-execution/presto_cpp/presto_protocol/Makefile index 775fd20d63f9b..204778d71fa33 100644 --- a/presto-native-execution/presto_cpp/presto_protocol/Makefile +++ b/presto-native-execution/presto_cpp/presto_protocol/Makefile @@ -64,4 +64,4 @@ presto_protocol.proto: presto_protocol-json pystache presto_protocol-protobuf.mustache connector/hive/presto_protocol_hive.json > connector/hive/presto_protocol_hive.proto pystache presto_protocol-protobuf.mustache connector/iceberg/presto_protocol_iceberg.json > connector/iceberg/presto_protocol_iceberg.proto pystache presto_protocol-protobuf.mustache connector/tpch/presto_protocol_tpch.json > connector/tpch/presto_protocol_tpch.proto - pystache presto_protocol-protobuf.mustache connector/clp/presto_protocol_clp.json > connector/clp/presto_protocol_tpch.proto + pystache presto_protocol-protobuf.mustache connector/clp/presto_protocol_clp.json > connector/clp/presto_protocol_clp.proto From 65ea0a8caef5fc176c833775a310f40dd04f1fb6 Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 20 Mar 2025 14:53:33 +0000 Subject: [PATCH 098/126] remove unused dependencies in pom.xml --- presto-clp/pom.xml | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/presto-clp/pom.xml b/presto-clp/pom.xml index c09c61c0a7272..63abb7bc987ec 100644 --- a/presto-clp/pom.xml +++ b/presto-clp/pom.xml @@ -50,11 +50,6 @@ guice - - - - - com.google.code.findbugs jsr305 @@ -77,16 +72,6 @@ provided - - - - - - - - - - com.facebook.presto presto-spi From b764f9c39f6aa028436def9179ce1ff13b61bcaf Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 20 Mar 2025 22:16:04 +0000 Subject: [PATCH 099/126] allow like predicate to be transformed to substr match and optimize it in clp connector optimize IS NULL case handle other expression case handle reversed operands for simple operators --- .../java/com/yscope/presto/ClpExpression.java | 5 + .../presto/ClpFilterToKqlConverter.java | 335 ++++++++++++++++-- .../yscope/presto/TestClpPlanOptimizer.java | 79 ++++- .../SqlToRowExpressionTranslator.java | 8 +- 4 files changed, 380 insertions(+), 47 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpExpression.java b/presto-clp/src/main/java/com/yscope/presto/ClpExpression.java index 64fde925be6a3..525a8bb201857 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpExpression.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpExpression.java @@ -28,6 +28,11 @@ public ClpExpression(Optional definition, Optional remain this.remainingExpression = remainingExpression; } + public ClpExpression() + { + this (Optional.empty(), Optional.empty()); + } + public ClpExpression(String definition) { this(Optional.of(definition), Optional.empty()); diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java index dc081b7ad682e..3035e3adcdb22 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java @@ -36,17 +36,23 @@ import java.util.Optional; import java.util.Set; +import static com.facebook.presto.common.function.OperatorType.EQUAL; +import static com.facebook.presto.common.function.OperatorType.GREATER_THAN; +import static com.facebook.presto.common.function.OperatorType.GREATER_THAN_OR_EQUAL; +import static com.facebook.presto.common.function.OperatorType.LESS_THAN; +import static com.facebook.presto.common.function.OperatorType.LESS_THAN_OR_EQUAL; +import static com.facebook.presto.common.function.OperatorType.NOT_EQUAL; import static com.facebook.presto.common.type.BooleanType.BOOLEAN; import static com.facebook.presto.spi.relation.SpecialFormExpression.Form.AND; import static com.yscope.presto.ClpErrorCode.CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION; import static java.util.Objects.requireNonNull; -// TODO(Rui): Correctly handle escaping of special characters in LIKE expressions -// TODO(Rui): Consider whether it handles `is NULL` and `is NOT NULL` expressions correctly +// TODO(Rui): Correctly handle escaping of special characters in LIKE expressions (LIKE 'a%b' ESCAPE 'a') public class ClpFilterToKqlConverter implements RowExpressionVisitor { - private static final Set LOGICAL_BINARY_OPS_FILTER = ImmutableSet.of("=", "<", "<=", ">", ">=", "<>"); + private static final Set LOGICAL_BINARY_OPS_FILTER = + ImmutableSet.of(EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_OR_EQUAL, GREATER_THAN, GREATER_THAN_OR_EQUAL); private final StandardFunctionResolution standardFunctionResolution; private final FunctionMetadataManager functionMetadataManager; @@ -128,6 +134,7 @@ else if (!remainingExpressions.isEmpty()) { remainingExpressions))); } } + // Remove the last " AND " from the query return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 5) + ")"); } @@ -144,6 +151,7 @@ private ClpExpression handleOr(SpecialFormExpression node) queryBuilder.append(expression.getDefinition().get()); queryBuilder.append(" OR "); } + // Remove the last " OR " from the query return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 4) + ")"); } @@ -163,40 +171,59 @@ private ClpExpression handleIn(SpecialFormExpression node) String literalString = getLiteralString(literal); queryBuilder.append(variableName).append(": "); if (literal.getType() instanceof VarcharType) { - queryBuilder.append("\""); - queryBuilder.append(literalString); - queryBuilder.append("\""); + queryBuilder.append("\"").append(literalString).append("\""); } else { queryBuilder.append(literalString); } queryBuilder.append(" OR "); } + // Remove the last " OR " from the query return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 4) + ")"); } + private ClpExpression handleIsNull(SpecialFormExpression node) + { + if (node.getArguments().size() != 1) { + throw new PrestoException(CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, + "IS NULL operator must have exactly one argument. Received: " + node); + } + + if (!(node.getArguments().get(0) instanceof VariableReferenceExpression)) { + return new ClpExpression(node); + } + + ClpExpression expression = node.getArguments().get(0).accept(this, null); + if (!expression.getDefinition().isPresent()) { + return new ClpExpression(node); + } + + String variableName = expression.getDefinition().get(); + return new ClpExpression(String.format("NOT %s: *", variableName)); + } + + // Only handles the case where there is a SQL wildcard in the middle of the string private ClpExpression handleLike(CallExpression node) { if (node.getArguments().size() != 2) { throw new PrestoException(CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, "LIKE operator must have exactly two arguments. Received: " + node); } - if (!(node.getArguments().get(0) instanceof VariableReferenceExpression)) { return new ClpExpression(node); } String variableName = getVariableName((VariableReferenceExpression) node.getArguments().get(0)); RowExpression argument = node.getArguments().get(1); + + String pattern; if (argument instanceof ConstantExpression) { ConstantExpression literal = (ConstantExpression) argument; - String literalString = getLiteralString(literal); - return new ClpExpression(variableName + ": \"" + literalString.replace("%", "*").replace("_", "?") + "\""); + pattern = getLiteralString(literal); } else if (argument instanceof CallExpression) { CallExpression callExpression = (CallExpression) argument; - FunctionHandle functionHandle = callExpression.getFunctionHandle(); - if (!standardFunctionResolution.isCastFunction(functionHandle)) { + if (!standardFunctionResolution.isCastFunction(callExpression.getFunctionHandle())) { return new ClpExpression(node); } if (callExpression.getArguments().size() != 1) { @@ -206,56 +233,277 @@ else if (argument instanceof CallExpression) { if (!(callExpression.getArguments().get(0) instanceof ConstantExpression)) { return new ClpExpression(node); } - ConstantExpression literal = (ConstantExpression) callExpression.getArguments().get(0); - String literalString = getLiteralString(literal); - return new ClpExpression(variableName + ": \"" + literalString.replace("%", "*").replace("_", "?") + "\""); + pattern = getLiteralString((ConstantExpression) callExpression.getArguments().get(0)); } - return new ClpExpression(node); + else { + return new ClpExpression(node); + } + pattern = pattern.replace("%", "*").replace("_", "?"); + return new ClpExpression(String.format("%s: \"%s\"", variableName, pattern)); } - private ClpExpression handleLogicalBinary(String operator, CallExpression node) + private static class SubstrInfo { - if (node.getArguments().size() != 2) { - throw new PrestoException(CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, - "Logical binary operator must have exactly two arguments. Received: " + node); + String variableName; + RowExpression startExpression; + RowExpression lengthExpression; + SubstrInfo(String variableName, RowExpression start, RowExpression length) + { + this.variableName = variableName; + this.startExpression = start; + this.lengthExpression = length; } + } - if (!(node.getArguments().get(0) instanceof VariableReferenceExpression) || - !(node.getArguments().get(1) instanceof ConstantExpression)) { - return new ClpExpression(node); + /** + * Parse SUBSTR(...) calls that appear either as: + * SUBSTR(x, start) + * or + * SUBSTR(x, start, length) + */ + private Optional parseSubstringCall(CallExpression callExpression) + { + FunctionMetadata functionMetadata = functionMetadataManager.getFunctionMetadata(callExpression.getFunctionHandle()); + String functionName = functionMetadata.getName().getObjectName(); + if (!functionName.equals("substr")) { + return Optional.empty(); } - ClpExpression leftExpression = node.getArguments().get(0).accept(this, null); - ClpExpression rightExpression = node.getArguments().get(1).accept(this, null); - if (!leftExpression.getDefinition().isPresent() || !rightExpression.getDefinition().isPresent()) { - return new ClpExpression(node); + int argCount = callExpression.getArguments().size(); + if (argCount < 2 || argCount > 3) { + return Optional.empty(); + } + + RowExpression arg0 = callExpression.getArguments().get(0); + if (!(arg0 instanceof VariableReferenceExpression)) { + return Optional.empty(); + } + + String varName = getVariableName((VariableReferenceExpression) arg0); + RowExpression startExpression = callExpression.getArguments().get(1); + RowExpression lengthExpression = null; + if (argCount == 3) { + lengthExpression = callExpression.getArguments().get(2); + } + + return Optional.of(new SubstrInfo(varName, startExpression, lengthExpression)); + } + + /** + * Attempt to parse "start" or "length" as an integer. + */ + private Optional parseIntValue(RowExpression expression) + { + if (expression instanceof ConstantExpression) { + try { + return Optional.of(Integer.parseInt(getLiteralString((ConstantExpression) expression))); + } + catch (NumberFormatException ignored) { } + } + else if (expression instanceof CallExpression) { + CallExpression call = (CallExpression) expression; + FunctionMetadata functionMetadata = functionMetadataManager.getFunctionMetadata(call.getFunctionHandle()); + Optional operatorTypeOptional = functionMetadata.getOperatorType(); + if (operatorTypeOptional.isPresent() && operatorTypeOptional.get().equals(OperatorType.NEGATION)) { + RowExpression arg0 = call.getArguments().get(0); + if (arg0 instanceof ConstantExpression) { + try { + return Optional.of(-Integer.parseInt(getLiteralString((ConstantExpression) arg0))); + } + catch (NumberFormatException ignored) { } + } + } + } + return Optional.empty(); + } + + /** + * If lengthExpression is a constant integer or LENGTH('someString') that matches targetString.length(), + * return that length. Otherwise empty. + */ + private Optional parseLengthLiteralOrFunction(RowExpression lengthExpression, String targetString) + { + // 1) If it’s a constant, just compare to targetString.length() + if (lengthExpression instanceof ConstantExpression) { + String val = getLiteralString((ConstantExpression) lengthExpression); + try { + int parsed = Integer.parseInt(val); + if (parsed == targetString.length()) { + return Optional.of(parsed); + } + } + catch (NumberFormatException ignored) { } + return Optional.empty(); + } + // 2) If it’s a function call, see if it’s LENGTH('xyz') that matches + if (lengthExpression instanceof CallExpression) { + CallExpression call = (CallExpression) lengthExpression; + FunctionMetadata functionMetadata = functionMetadataManager.getFunctionMetadata(call.getFunctionHandle()); + String functionName = functionMetadata.getName().getObjectName(); + if (functionName.equals("length") && call.getArguments().size() == 1) { + RowExpression arg0 = call.getArguments().get(0); + if (arg0 instanceof ConstantExpression) { + String inside = getLiteralString((ConstantExpression) arg0); + if (inside.equals(targetString)) { + return Optional.of(targetString.length()); + } + } + } + } + return Optional.empty(); + } + + /** + * Translate SUBSTR(x, start) or SUBSTR(x, start, length) = 'someString' to KQL. + */ + private ClpExpression interpretSubstringEquality(SubstrInfo info, String targetString) + { + if (info.lengthExpression != null) { + Optional maybeStart = parseIntValue(info.startExpression); + Optional maybeLen = parseLengthLiteralOrFunction(info.lengthExpression, targetString); + + if (maybeStart.isPresent() && maybeLen.isPresent()) { + int start = maybeStart.get(); + int len = maybeLen.get(); + if (len == targetString.length()) { + StringBuilder result = new StringBuilder(); + result.append(info.variableName).append(": \""); + for (int i = 1; i < start; i++) { + result.append("?"); + } + result.append(targetString).append("*\""); + return new ClpExpression(result.toString()); + } + } + } + else { + Optional maybeStart = parseIntValue(info.startExpression); + if (maybeStart.isPresent()) { + int start = maybeStart.get(); + if (start > 0) { + StringBuilder result = new StringBuilder(); + result.append(info.variableName).append(": \""); + for (int i = 1; i < start; i++) { + result.append("?"); + } + result.append(targetString).append("\""); + return new ClpExpression(result.toString()); + } + if (start == -targetString.length()) { + return new ClpExpression(String.format("%s: \"*%s\"", info.variableName, targetString)); + } + } + } + + return new ClpExpression(Optional.empty(), Optional.empty()); + } + + private ClpExpression tryInterpretSubstringEquality( + OperatorType operator, + RowExpression possibleSubstring, + RowExpression possibleLiteral) + { + if (!operator.equals(OperatorType.EQUAL)) { + return new ClpExpression(); + } + + if (!(possibleSubstring instanceof CallExpression) || + !(possibleLiteral instanceof ConstantExpression)) { + return new ClpExpression(); } - String variableName = leftExpression.getDefinition().get(); - String literalString = rightExpression.getDefinition().get(); - Type literalType = node.getArguments().get(1).getType(); - if (operator.equals("=")) { + Optional maybeSubstringCall = parseSubstringCall((CallExpression) possibleSubstring); + if (!maybeSubstringCall.isPresent()) { + return new ClpExpression(); + } + + String targetString = getLiteralString((ConstantExpression) possibleLiteral); + return interpretSubstringEquality(maybeSubstringCall.get(), targetString); + } + + private ClpExpression buildClpExpression( + String variableName, + String literalString, + OperatorType operator, + Type literalType, + RowExpression originalNode) + { + if (operator.equals(OperatorType.EQUAL)) { if (literalType instanceof VarcharType) { - return new ClpExpression(variableName + ": \"" + literalString + "\""); + return new ClpExpression(String.format("%s: \"%s\"", variableName, literalString)); } else { - return new ClpExpression(variableName + ": " + literalString); + return new ClpExpression(String.format("%s: %s", variableName, literalString)); } } - else if (operator.equals("<>")) { + else if (operator.equals(OperatorType.NOT_EQUAL)) { if (literalType instanceof VarcharType) { - return new ClpExpression("NOT " + variableName + ": \"" + literalString + "\""); + return new ClpExpression(String.format("NOT %s: \"%s\"", variableName, literalString)); } else { - return new ClpExpression("NOT " + variableName + ": " + literalString); + return new ClpExpression(String.format("NOT %s: %s", variableName, literalString)); } } else if (LOGICAL_BINARY_OPS_FILTER.contains(operator) && !(literalType instanceof VarcharType)) { - return new ClpExpression(variableName + " " + operator + " " + literalString); + return new ClpExpression(String.format("%s %s %s", variableName, operator.getOperator(), literalString)); } - else { + return new ClpExpression(originalNode); + } + + private ClpExpression handleLogicalBinary(OperatorType operator, CallExpression node) + { + if (node.getArguments().size() != 2) { + throw new PrestoException(CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, + "Logical binary operator must have exactly two arguments. Received: " + node); + } + RowExpression left = node.getArguments().get(0); + RowExpression right = node.getArguments().get(1); + + ClpExpression maybeLeftSubstring = tryInterpretSubstringEquality(operator, left, right); + if (maybeLeftSubstring.getDefinition().isPresent()) { + return maybeLeftSubstring; + } + + ClpExpression maybeRightSubstring = tryInterpretSubstringEquality(operator, right, left); + if (maybeRightSubstring.getDefinition().isPresent()) { + return maybeRightSubstring; + } + + ClpExpression leftExpression = left.accept(this, null); + ClpExpression rightExpression = right.accept(this, null); + Optional leftDefinition = leftExpression.getDefinition(); + Optional rightDefinition = rightExpression.getDefinition(); + if (!leftDefinition.isPresent() || !rightDefinition.isPresent()) { return new ClpExpression(node); } + + boolean leftIsVariable = (left instanceof VariableReferenceExpression); + boolean rightIsVariable = (right instanceof VariableReferenceExpression); + boolean leftIsConstant = (left instanceof ConstantExpression); + boolean rightIsConstant = (right instanceof ConstantExpression); + + Type leftType = left.getType(); + Type rightType = right.getType(); + + if (leftIsVariable && rightIsConstant) { + return buildClpExpression( + leftDefinition.get(), // variable + rightDefinition.get(), // literal + operator, + rightType, + node); + } + else if (leftIsConstant && rightIsVariable) { + OperatorType newOperator = OperatorType.flip(operator); + return buildClpExpression( + rightDefinition.get(), // variable + leftDefinition.get(), // literal + newOperator, + leftType, + node); + } + // fallback + return new ClpExpression(node); } @Override @@ -274,8 +522,8 @@ public ClpExpression visitCall(CallExpression node, Void context) Optional operatorTypeOptional = functionMetadata.getOperatorType(); if (operatorTypeOptional.isPresent()) { OperatorType operatorType = operatorTypeOptional.get(); - if (operatorType.isComparisonOperator()) { - return handleLogicalBinary(operatorType.getOperator(), node); + if (operatorType.isComparisonOperator() || operatorType != OperatorType.IS_DISTINCT_FROM) { + return handleLogicalBinary(operatorType, node); } } @@ -304,8 +552,17 @@ public ClpExpression visitSpecialForm(SpecialFormExpression node, Void context) return handleOr(node); case IN: return handleIn(node); + case IS_NULL: + return handleIsNull(node); default: return new ClpExpression(node); } } + + // For all other expressions, return the original expression + @Override + public ClpExpression visitExpression(RowExpression node, Void context) + { + return new ClpExpression(node); + } } diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java b/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java index a7a3f2749041e..62496cae00169 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java @@ -56,14 +56,39 @@ public void testStringMatchPushdown() { SessionHolder sessionHolder = new SessionHolder(); + // Exact match testFilter("city = 'hello world'", Optional.of("city: \"hello world\""), Optional.empty(), sessionHolder); - testFilter("city != 'hello world'", Optional.of("NOT city: \"hello world\""), Optional.empty(), sessionHolder); + testFilter("'hello world' = city", Optional.of("city: \"hello world\""), Optional.empty(), sessionHolder); + + // Like predicates that are transformed into substring match testFilter("city like 'hello%'", Optional.of("city: \"hello*\""), Optional.empty(), sessionHolder); - testFilter("city not like 'hello%'", Optional.of("NOT city: \"hello*\""), Optional.empty(), sessionHolder); + testFilter("city like '%hello'", Optional.of("city: \"*hello\""), Optional.empty(), sessionHolder); + + // Like predicates that are transformed into CARDINALITY(SPLIT(x, 'some string', 2)) = 2 form, and they are not pushed down for now + testFilter("city like '%hello%'", Optional.empty(), Optional.of("city like '%hello%'"), sessionHolder); + + // Like predicates that are kept in the original forms testFilter("city like 'hello_'", Optional.of("city: \"hello?\""), Optional.empty(), sessionHolder); - testFilter("city not like 'hello_'", Optional.of("NOT city: \"hello?\""), Optional.empty(), sessionHolder); + testFilter("city like '_hello'", Optional.of("city: \"?hello\""), Optional.empty(), sessionHolder); testFilter("city like 'hello_w%'", Optional.of("city: \"hello?w*\""), Optional.empty(), sessionHolder); - testFilter("city not like 'hello_w%'", Optional.of("NOT city: \"hello?w*\""), Optional.empty(), sessionHolder); + testFilter("city like '%hello_w'", Optional.of("city: \"*hello?w\""), Optional.empty(), sessionHolder); + testFilter("city like 'hello%world'", Optional.of("city: \"hello*world\""), Optional.empty(), sessionHolder); + testFilter("city like 'hello%wor%ld'", Optional.of("city: \"hello*wor*ld\""), Optional.empty(), sessionHolder); + } + + @Test + public void testSubStringPushdown() + { + SessionHolder sessionHolder = new SessionHolder(); + + testFilter("substr(city, 1, 2) = 'he'", Optional.of("city: \"he*\""), Optional.empty(), sessionHolder); + testFilter("substr(city, 5, 2) = 'he'", Optional.of("city: \"????he*\""), Optional.empty(), sessionHolder); + testFilter("substr(city, 5) = 'he'", Optional.of("city: \"????he\""), Optional.empty(), sessionHolder); + testFilter("substr(city, -2) = 'he'", Optional.of("city: \"*he\""), Optional.empty(), sessionHolder); + + // Invalid substring index is not pushed down + testFilter("substr(city, 1, 5) = 'he'", Optional.empty(), Optional.of("substr(city, 1, 5) = 'he'"), sessionHolder); + testFilter("substr(city, -5) = 'he'", Optional.empty(), Optional.of("substr(city, -5) = 'he'"), sessionHolder); } @Test @@ -77,6 +102,14 @@ public void testNumericComparisonPushdown() testFilter("fare <= 0", Optional.of("fare <= 0"), Optional.empty(), sessionHolder); testFilter("fare = 0", Optional.of("fare: 0"), Optional.empty(), sessionHolder); testFilter("fare != 0", Optional.of("NOT fare: 0"), Optional.empty(), sessionHolder); + testFilter("fare <> 0", Optional.of("NOT fare: 0"), Optional.empty(), sessionHolder); + testFilter("0 < fare", Optional.of("fare > 0"), Optional.empty(), sessionHolder); + testFilter("0 <= fare", Optional.of("fare >= 0"), Optional.empty(), sessionHolder); + testFilter("0 > fare", Optional.of("fare < 0"), Optional.empty(), sessionHolder); + testFilter("0 >= fare", Optional.of("fare <= 0"), Optional.empty(), sessionHolder); + testFilter("0 = fare", Optional.of("fare: 0"), Optional.empty(), sessionHolder); + testFilter("0 != fare", Optional.of("NOT fare: 0"), Optional.empty(), sessionHolder); + testFilter("0 <> fare", Optional.of("NOT fare: 0"), Optional.empty(), sessionHolder); } @Test @@ -88,6 +121,16 @@ public void testOrPushdown() sessionHolder); testFilter("lower(\"region.Name\") = 'hello world' OR \"region.Id\" != 1", Optional.empty(), Optional.of("(lower(\"region.Name\") = 'hello world' OR \"region.Id\" != 1)"), sessionHolder); + + // Multiple ORs + testFilter("fare > 0 OR city like 'b%' OR lower(\"region.Name\") = 'hello world' OR \"region.Id\" != 1", + Optional.empty(), + Optional.of("fare > 0 OR city like 'b%' OR lower(\"region.Name\") = 'hello world' OR \"region.Id\" != 1"), + sessionHolder); + testFilter("fare > 0 OR city like 'b%' OR \"region.Id\" != 1", + Optional.of("((fare > 0 OR city: \"b*\") OR NOT region.Id: 1)"), + Optional.empty(), + sessionHolder); } @Test @@ -98,6 +141,16 @@ public void testAndPushdown() testFilter("fare > 0 AND city like 'b%'", Optional.of("(fare > 0 AND city: \"b*\")"), Optional.empty(), sessionHolder); testFilter("lower(\"region.Name\") = 'hello world' AND \"region.Id\" != 1", Optional.of("(NOT region.Id: 1)"), Optional.of("lower(\"region.Name\") = 'hello world'"), sessionHolder); + + // Multiple ANDs + testFilter("fare > 0 AND city like 'b%' AND lower(\"region.Name\") = 'hello world' AND \"region.Id\" != 1", + Optional.of("(((fare > 0 AND city: \"b*\")) AND NOT region.Id: 1)"), + Optional.of("(lower(\"region.Name\") = 'hello world')"), + sessionHolder); + testFilter("fare > 0 AND city like '%b%' AND lower(\"region.Name\") = 'hello world' AND \"region.Id\" != 1", + Optional.of("(((fare > 0)) AND NOT region.Id: 1)"), + Optional.of("city like '%b%' AND lower(\"region.Name\") = 'hello world'"), + sessionHolder); } @Test @@ -113,6 +166,10 @@ public void testNotPushdown() testFilter("fare != 0", Optional.of("NOT fare: 0"), Optional.empty(), sessionHolder); testFilter("fare <> 0", Optional.of("NOT fare: 0"), Optional.empty(), sessionHolder); testFilter("NOT (fare = 0)", Optional.of("NOT fare: 0"), Optional.empty(), sessionHolder); + + // Multiple NOTs + testFilter("NOT (NOT fare = 0)", Optional.of("NOT NOT fare: 0"), Optional.empty(), sessionHolder); + testFilter("NOT (fare = 0 AND city = 'hello world')", Optional.of("NOT (fare: 0 AND city: \"hello world\")"), Optional.empty(), sessionHolder); } @Test @@ -123,6 +180,15 @@ public void testInPushdown() testFilter("city IN ('hello world', 'hello world 2')", Optional.of("(city: \"hello world\" OR city: \"hello world 2\")"), Optional.empty(), sessionHolder); } + @Test + public void testIsNullPushdown() + { + SessionHolder sessionHolder = new SessionHolder(); + + testFilter("city IS NULL", Optional.of("NOT city: *"), Optional.empty(), sessionHolder); + testFilter("city IS NOT NULL", Optional.of("NOT NOT city: *"), Optional.empty(), sessionHolder); + } + @Test public void testComplexPushdown() { @@ -130,5 +196,10 @@ public void testComplexPushdown() Optional.of("((fare > 0 OR city: \"b*\"))"), Optional.of("(lower(\"region.Name\") = 'hello world' OR city IS NULL)"), new SessionHolder()); + // complex cases with and, or and not + testFilter("\"region.Id\" = 1 AND (fare > 0 OR city not like 'b%') AND (lower(\"region.Name\") = 'hello world' OR city IS NULL)", + Optional.of("((region.Id: 1 AND (fare > 0 OR NOT city: \"b*\")))"), + Optional.of("lower(\"region.Name\") = 'hello world' OR city IS NULL"), + new SessionHolder()); } } diff --git a/presto-main/src/main/java/com/facebook/presto/sql/relational/SqlToRowExpressionTranslator.java b/presto-main/src/main/java/com/facebook/presto/sql/relational/SqlToRowExpressionTranslator.java index 3c36739dd9f26..c33430616a779 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/relational/SqlToRowExpressionTranslator.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/relational/SqlToRowExpressionTranslator.java @@ -914,10 +914,10 @@ protected RowExpression visitLikePredicate(LikePredicate node, Context context) return likeFunctionCall(value, call(getSourceLocation(node), "LIKE_PATTERN", functionResolution.likePatternFunction(), LIKE_PATTERN, pattern, escape)); } -// RowExpression prefixOrSuffixMatch = generateLikePrefixOrSuffixMatch(value, pattern); -// if (prefixOrSuffixMatch != null) { -// return prefixOrSuffixMatch; -// } + RowExpression prefixOrSuffixMatch = generateLikePrefixOrSuffixMatch(value, pattern); + if (prefixOrSuffixMatch != null) { + return prefixOrSuffixMatch; + } if (!functionResolution.supportsLikePatternFunction()) { return likeFunctionCall(value, pattern); From 4a6b4ff2a9b02a6b86ea9dd5593deeb3fee21f0a Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 21 Mar 2025 03:07:23 +0000 Subject: [PATCH 100/126] clean up the code --- .../presto/ClpFilterToKqlConverter.java | 21 ++----------------- .../yscope/presto/TestClpPlanOptimizer.java | 7 ++++--- 2 files changed, 6 insertions(+), 22 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java index 3035e3adcdb22..fa70355701407 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java @@ -318,12 +318,11 @@ else if (expression instanceof CallExpression) { } /** - * If lengthExpression is a constant integer or LENGTH('someString') that matches targetString.length(), + * If lengthExpression is a constant integer that matches targetString.length(), * return that length. Otherwise empty. */ private Optional parseLengthLiteralOrFunction(RowExpression lengthExpression, String targetString) { - // 1) If it’s a constant, just compare to targetString.length() if (lengthExpression instanceof ConstantExpression) { String val = getLiteralString((ConstantExpression) lengthExpression); try { @@ -333,22 +332,6 @@ private Optional parseLengthLiteralOrFunction(RowExpression lengthExpre } } catch (NumberFormatException ignored) { } - return Optional.empty(); - } - // 2) If it’s a function call, see if it’s LENGTH('xyz') that matches - if (lengthExpression instanceof CallExpression) { - CallExpression call = (CallExpression) lengthExpression; - FunctionMetadata functionMetadata = functionMetadataManager.getFunctionMetadata(call.getFunctionHandle()); - String functionName = functionMetadata.getName().getObjectName(); - if (functionName.equals("length") && call.getArguments().size() == 1) { - RowExpression arg0 = call.getArguments().get(0); - if (arg0 instanceof ConstantExpression) { - String inside = getLiteralString((ConstantExpression) arg0); - if (inside.equals(targetString)) { - return Optional.of(targetString.length()); - } - } - } } return Optional.empty(); } @@ -365,7 +348,7 @@ private ClpExpression interpretSubstringEquality(SubstrInfo info, String targetS if (maybeStart.isPresent() && maybeLen.isPresent()) { int start = maybeStart.get(); int len = maybeLen.get(); - if (len == targetString.length()) { + if (start > 0 && len == targetString.length()) { StringBuilder result = new StringBuilder(); result.append(info.variableName).append(": \""); for (int i = 1; i < start; i++) { diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java b/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java index 62496cae00169..3f64532ee14e8 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java @@ -192,14 +192,15 @@ public void testIsNullPushdown() @Test public void testComplexPushdown() { + SessionHolder sessionHolder = new SessionHolder(); + testFilter("(fare > 0 OR city like 'b%') AND (lower(\"region.Name\") = 'hello world' OR city IS NULL)", Optional.of("((fare > 0 OR city: \"b*\"))"), Optional.of("(lower(\"region.Name\") = 'hello world' OR city IS NULL)"), - new SessionHolder()); - // complex cases with and, or and not + sessionHolder); testFilter("\"region.Id\" = 1 AND (fare > 0 OR city not like 'b%') AND (lower(\"region.Name\") = 'hello world' OR city IS NULL)", Optional.of("((region.Id: 1 AND (fare > 0 OR NOT city: \"b*\")))"), Optional.of("lower(\"region.Name\") = 'hello world' OR city IS NULL"), - new SessionHolder()); + sessionHolder); } } From e4b558eea6a94744aa88ce9b6474b04b800605e9 Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 25 Mar 2025 02:27:56 +0000 Subject: [PATCH 101/126] add clp to presto-dpcs and clean up dependencies --- presto-docs/src/main/sphinx/connector.rst | 1 + presto-docs/src/main/sphinx/connector/clp.rst | 183 ++++++++++++++++++ .../etc/catalog/hive.properties | 5 + presto-native-execution/etc/config.properties | 8 +- presto-native-execution/etc/node.properties | 4 +- 5 files changed, 194 insertions(+), 7 deletions(-) create mode 100644 presto-docs/src/main/sphinx/connector/clp.rst diff --git a/presto-docs/src/main/sphinx/connector.rst b/presto-docs/src/main/sphinx/connector.rst index d337fe4ed12d1..a45ab8f9bafa4 100644 --- a/presto-docs/src/main/sphinx/connector.rst +++ b/presto-docs/src/main/sphinx/connector.rst @@ -14,6 +14,7 @@ from different data sources. connector/blackhole connector/cassandra connector/clickhouse + connector/clp connector/deltalake connector/druid connector/elasticsearch diff --git a/presto-docs/src/main/sphinx/connector/clp.rst b/presto-docs/src/main/sphinx/connector/clp.rst new file mode 100644 index 0000000000000..bb77b064b3abd --- /dev/null +++ b/presto-docs/src/main/sphinx/connector/clp.rst @@ -0,0 +1,183 @@ +======================= +CLP Connector +======================= + +.. contents:: + :local: + :backlinks: none + :depth: 1 + +Overview +-------- + +The CLP Connector enables SQL-based querying of CLP-S archives from Presto. This document describes how to setup the +CLP Connector to run SQL queries. + + +Configuration +------------- + +To configure the CLP connector, create a catalog properties file +``etc/catalog/clp.properties`` with the following contents, +replacing the properties as appropriate: + +.. code-block:: none + + connector.name=clp + clp.archive-source=local + clp.metadata-source=mysql + clp.metadata-db-url=jdbc:mysql://localhost:3306 + clp.metadata-db-name=clp_db + clp.metadata-db-user=clp_user + clp.metadata-db-password=clp_password + clp.metadata-table-prefix=clp_ + clp.split-source=mysql + + +Configuration Properties +------------------------ + +The following configuration properties are available: + +============================================= ============================================================================== +Property Name Description +============================================= ============================================================================== +``clp.archive-source`` The source of the CLP archive. +``clp.metadata-expire-interval`` The time interval after which metadata entries are considered expired. +``clp.metadata-refresh-interval`` The frequency at which metadata is refreshed from the source. +``clp.polymorphic-type-enabled`` Enables or disables support for polymorphic types within CLP. +``clp.metadata-source`` The source from which metadata is fetched. +``clp.metadata-db-url`` The connection URL for the metadata database. +``clp.metadata-db-name`` The name of the metadata database. +``clp.metadata-db-user`` The database user with access to the metadata database. +``clp.metadata-db-password`` The password for the metadata database user. +``clp.metadata-table-prefix`` A prefix applied to table names in the metadata database. +``clp.split-source`` The source of split information for query execution. +============================================= ============================================================================== + +``clp.archive-source`` +^^^^^^^^^^^^^^^^^^^^^^ + +Specifies the source of the CLP archive. Supported values include ``local`` (local storage) and ``s3`` (Amazon S3). + +This property is optional. The default is ``local``. + +``clp.metadata-expire-interval`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Defines how long metadata entries remain valid before being considered expired, in seconds. + +This property is optional. The default is ``600``. + +``clp.metadata-refresh-interval`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Specifies how frequently metadata is refreshed from the source, in seconds. This ensures that metadata remains up to +date. + +Set this to a lower value for frequently changing datasets or to a higher value to reduce load. + +This property is optional. The default is ``60``. + +``clp.polymorphic-type-enabled`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Enables or disables support for polymorphic types in CLP, allowing the same field to have different types. +This is useful for schema-less, semi-structured data where the same field may appear with different types. +When enabled, type annotations are added to conflicting field names to distinguish between types. For example, if ``id`` +column appears as both an ``int`` and ``string`` types, the connector will create two columns named ``id_bigint`` and +``id_varchar``. + +Supported type annotations include ``bigint``, ``varchar``, ``double``, ``boolean``, and +``array(varchar)`` (See `Data Types`_ for details). For columns with only one type, the original column name is used. + +This property is optional. The default is ``false``. + +``clp.metadata-source`` +^^^^^^^^^^^^^^^^^^^^^^^ +Currently, the only supported source is a MySQL database, which is also used by the CLP package to store metadata. +Additional sources can be supported by implementing the ``ClpMetadataProvider`` interface. + +This property is optional. The default is ``mysql``. + +``clp.metadata-db-url`` +^^^^^^^^^^^^^^^^^^^^^^^ +The JDBC URL used to connect to the metadata database. + +This property is required if ``clp.metadata-source`` is set to ``mysql``. + +``clp.metadata-db-name`` +^^^^^^^^^^^^^^^^^^^^^^^^ + +The name of the metadata database. + +This option is required if ``clp.metadata-source`` is set to ``mysql`` and the database name is not specified in the URL. + +``clp.metadata-db-user`` +^^^^^^^^^^^^^^^^^^^^^^^^ + +The username used to authenticate with the metadata database. + +Ensure this user has read access to the relevant metadata tables. + +This option is required if ``clp.metadata-source`` is set to ``mysql``. + +``clp.metadata-db-password`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The password for the user specified in ``clp.metadata-db-user``. + +This option is required if ``clp.metadata-source`` is set to ``mysql``. + +``clp.metadata-table-prefix`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A string prefix prepended to all metadata table names when querying the database. Useful for namespacing or avoiding +collisions. + +This option is optional. The default is empty. + +``clp.split-source`` +^^^^^^^^^^^^^^^^^^^^ + +Specifies the source of split information for tables. By default, it uses the same source as the metadata with the same +connection parameters. Additional sources can be supported by implementing the ``ClpSplitProvider`` interface. + +This property is optional. The default is ``mysql``. + +Metadata and Split Providers +---------------------------- +As mentioned earlier, the CLP connector relies on metadata and split providers to retrieve information from various +sources. By default, it uses a MySQL database for both metadata and split storage. We recommend using the CLP package +for log ingestion, which automatically populates the database with the required information. However, if you prefer to +use a different source—or the same source with a custom implementation—you can provide your own implementations of +the ``ClpMetadataProvider`` and ``ClpSplitProvider`` interfaces, and configure the connector accordingly. + +Data Types +---------- + +The data type mappings are as follows: + +====================== ==================== +CLP Type Presto Type +====================== ==================== +``Integer`` ``BIGINT`` +``Float`` ``DOUBLE`` +``ClpString`` ``VARCHAR`` +``VarString`` ``VARCHAR`` +``DateString`` ``VARCHAR`` +``Boolean`` ``BOOLEAN`` +``UnstructuredArray`` ``ARRAY(VARCHAR)`` +(others) (unsupported) +====================== ==================== + + +Array Types +^^^^^^^^^^^ + +CLP supports two array types: ``UnstructuredArray`` and ``StructuredArray``. Unstructured arrays are stored as strings +in CLP and elements can be any type. However, in Presto arrays are homogenous, so the elements are converted to strings +when read. ``StructuredArray`` type is not supported yet. + +SQL support +----------- + +The connector only provides read access to data. It does not support DDL operations, such as creating or dropping +tables. Currently, we only support one ``default`` schema. diff --git a/presto-native-execution/etc/catalog/hive.properties b/presto-native-execution/etc/catalog/hive.properties index ee8abe93af853..8eafe3bef1d85 100644 --- a/presto-native-execution/etc/catalog/hive.properties +++ b/presto-native-execution/etc/catalog/hive.properties @@ -1,5 +1,10 @@ connector.name=hive-hadoop2 hive.metastore=file +<<<<<<< Updated upstream hive.metastore.catalog.dir=file:///root/presto/presto-native-execution/hive_catalog hive.parquet.use-column-names=true file-column-names-read-as-lower-case=true +======= +hive.metastore.catalog.dir=file:///home/user/presto/hive_catalog +hive.compression-codec=zstd +>>>>>>> Stashed changes diff --git a/presto-native-execution/etc/config.properties b/presto-native-execution/etc/config.properties index 9a9c35d0cdc7a..b29e663f30ebf 100644 --- a/presto-native-execution/etc/config.properties +++ b/presto-native-execution/etc/config.properties @@ -1,6 +1,6 @@ -discovery.uri=http://127.0.0.1:8080 -presto.version=0.289-SNAPSHOT-c8a1099 +discovery.uri=http://127.0.0.1:58215 +presto.version=testversion http-server.http.port=7777 shutdown-onset-sec=1 -register-test-functions=false -runtime-metrics-collection-enabled=false +register-test-functions=true +runtime-metrics-collection-enabled=true diff --git a/presto-native-execution/etc/node.properties b/presto-native-execution/etc/node.properties index 180bdb2e68e0a..1d92b7ace8087 100644 --- a/presto-native-execution/etc/node.properties +++ b/presto-native-execution/etc/node.properties @@ -1,5 +1,3 @@ -node.environment=production +node.environment=testing node.internal-address=127.0.0.1 node.location=testing-location -node.id=worker -node.data-dir=/root/presto/presto-native-execution/data_velox_worker \ No newline at end of file From a45341bb2179b8f1283178a550e1f97d6b7b7878 Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 25 Mar 2025 02:29:24 +0000 Subject: [PATCH 102/126] remove config examples --- etc_coordinator_s3/catalog/clp.properties | 11 ----------- etc_coordinator_s3/config.properties | 15 --------------- etc_coordinator_s3/jvm.config | 10 ---------- etc_coordinator_s3/log.properties | 1 - etc_coordinator_s3/node.properties | 2 -- .../etc_coordinator/catalog/clp.properties | 4 ---- .../etc_coordinator/catalog/hive.properties | 4 ---- .../etc_coordinator/catalog/postgresql.properties | 3 --- .../etc_coordinator/config.properties | 15 --------------- .../etc_coordinator/jvm.config | 10 ---------- .../etc_coordinator/log.properties | 1 - .../etc_coordinator/node.properties | 3 --- .../etc_worker/catalog/clp.properties | 11 ----------- .../etc_worker/catalog/hive.properties | 5 ----- .../etc_worker/catalog/iceberg.properties | 1 - .../etc_worker/catalog/tpchstandard.properties | 1 - .../etc_worker/config.properties | 6 ------ .../etc_worker/node.properties | 4 ---- .../etc_worker/velox.properties | 1 - 19 files changed, 108 deletions(-) delete mode 100644 etc_coordinator_s3/catalog/clp.properties delete mode 100644 etc_coordinator_s3/config.properties delete mode 100644 etc_coordinator_s3/jvm.config delete mode 100644 etc_coordinator_s3/log.properties delete mode 100644 etc_coordinator_s3/node.properties delete mode 100644 presto-native-execution/etc_coordinator/catalog/clp.properties delete mode 100644 presto-native-execution/etc_coordinator/catalog/hive.properties delete mode 100644 presto-native-execution/etc_coordinator/catalog/postgresql.properties delete mode 100644 presto-native-execution/etc_coordinator/config.properties delete mode 100644 presto-native-execution/etc_coordinator/jvm.config delete mode 100644 presto-native-execution/etc_coordinator/log.properties delete mode 100644 presto-native-execution/etc_coordinator/node.properties delete mode 100644 presto-native-execution/etc_worker/catalog/clp.properties delete mode 100644 presto-native-execution/etc_worker/catalog/hive.properties delete mode 100644 presto-native-execution/etc_worker/catalog/iceberg.properties delete mode 100644 presto-native-execution/etc_worker/catalog/tpchstandard.properties delete mode 100644 presto-native-execution/etc_worker/config.properties delete mode 100644 presto-native-execution/etc_worker/node.properties delete mode 100644 presto-native-execution/etc_worker/velox.properties diff --git a/etc_coordinator_s3/catalog/clp.properties b/etc_coordinator_s3/catalog/clp.properties deleted file mode 100644 index 86439329be564..0000000000000 --- a/etc_coordinator_s3/catalog/clp.properties +++ /dev/null @@ -1,11 +0,0 @@ -connector.name=clp -clp.metadata-db-host=localhost -clp.metadata-db-port=3306 -clp.metadata-db-user=clp-user -clp.metadata-db-password=password -clp.metadata-db-name=clp-db -clp.metadata-table-prefix=clp_ -clp.input-source=s3 -clp.s3-bucket=https://example.s3.us-east-1.amazonaws.com -clp.s3-key-prefix=test/ -clp.polymorphic-type-enabled=true \ No newline at end of file diff --git a/etc_coordinator_s3/config.properties b/etc_coordinator_s3/config.properties deleted file mode 100644 index a90dca3192543..0000000000000 --- a/etc_coordinator_s3/config.properties +++ /dev/null @@ -1,15 +0,0 @@ -coordinator=true -node-scheduler.include-coordinator=false -http-server.http.port=8080 -query.max-memory=1GB -query.max-memory-per-node=1GB -discovery-server.enabled=true -discovery.uri=http://localhost:8080 -#task.max-worker-threads=1 -#task.concurrency=1 -experimental.internal-communication.thrift-transport-enabled=true -optimizer.optimize-hash-generation=false -regex-library=RE2J -use-alternative-function-signatures=true -inline-sql-functions=false -nested-data-serialization-enabled=false \ No newline at end of file diff --git a/etc_coordinator_s3/jvm.config b/etc_coordinator_s3/jvm.config deleted file mode 100644 index 1821127484229..0000000000000 --- a/etc_coordinator_s3/jvm.config +++ /dev/null @@ -1,10 +0,0 @@ --server --agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5006 --Xmx4G --XX:+UseG1GC --XX:G1HeapRegionSize=32M --XX:+UseGCOverheadLimit --XX:+ExplicitGCInvokesConcurrent --XX:+HeapDumpOnOutOfMemoryError --XX:+ExitOnOutOfMemoryError --Djdk.attach.allowAttachSelf=true diff --git a/etc_coordinator_s3/log.properties b/etc_coordinator_s3/log.properties deleted file mode 100644 index ccde82c2d8c33..0000000000000 --- a/etc_coordinator_s3/log.properties +++ /dev/null @@ -1 +0,0 @@ -com.facebook.presto=DEBUG \ No newline at end of file diff --git a/etc_coordinator_s3/node.properties b/etc_coordinator_s3/node.properties deleted file mode 100644 index ab655ca2202ea..0000000000000 --- a/etc_coordinator_s3/node.properties +++ /dev/null @@ -1,2 +0,0 @@ -node.environment=production -node.id=coordinator \ No newline at end of file diff --git a/presto-native-execution/etc_coordinator/catalog/clp.properties b/presto-native-execution/etc_coordinator/catalog/clp.properties deleted file mode 100644 index 0feade69e2944..0000000000000 --- a/presto-native-execution/etc_coordinator/catalog/clp.properties +++ /dev/null @@ -1,4 +0,0 @@ -connector.name=clp -executable-path=/root/clp/components/core/build/clp-s -archive-dir=/root/presto/presto-native-execution/clp_archive -polymorphic-type-enabled=true \ No newline at end of file diff --git a/presto-native-execution/etc_coordinator/catalog/hive.properties b/presto-native-execution/etc_coordinator/catalog/hive.properties deleted file mode 100644 index 9109cb18ef86c..0000000000000 --- a/presto-native-execution/etc_coordinator/catalog/hive.properties +++ /dev/null @@ -1,4 +0,0 @@ -connector.name=hive-hadoop2 -hive.metastore=file -hive.metastore.catalog.dir=file:///root/presto/presto-native-execution/hive_catalog -hive.parquet.use-column-names=true diff --git a/presto-native-execution/etc_coordinator/catalog/postgresql.properties b/presto-native-execution/etc_coordinator/catalog/postgresql.properties deleted file mode 100644 index ca28b9ceb038e..0000000000000 --- a/presto-native-execution/etc_coordinator/catalog/postgresql.properties +++ /dev/null @@ -1,3 +0,0 @@ -connector.name=postgresql -connection-url=jdbc:postgresql://localhost:5432/mydb -connection-user=root diff --git a/presto-native-execution/etc_coordinator/config.properties b/presto-native-execution/etc_coordinator/config.properties deleted file mode 100644 index 994b2cd3503f3..0000000000000 --- a/presto-native-execution/etc_coordinator/config.properties +++ /dev/null @@ -1,15 +0,0 @@ -coordinator=true -node-scheduler.include-coordinator=false -http-server.http.port=8080 -query.max-memory=4GB -query.max-memory-per-node=4GB -discovery-server.enabled=true -discovery.uri=http://localhost:8080 -task.max-worker-threads=1 -task.concurrency=1 -experimental.internal-communication.thrift-transport-enabled=true -optimizer.optimize-hash-generation=false -regex-library=RE2J -use-alternative-function-signatures=true -inline-sql-functions=false -nested-data-serialization-enabled=false \ No newline at end of file diff --git a/presto-native-execution/etc_coordinator/jvm.config b/presto-native-execution/etc_coordinator/jvm.config deleted file mode 100644 index a943e2f2cd1d9..0000000000000 --- a/presto-native-execution/etc_coordinator/jvm.config +++ /dev/null @@ -1,10 +0,0 @@ --server --agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5006 --Xmx16G --XX:+UseG1GC --XX:G1HeapRegionSize=32M --XX:+UseGCOverheadLimit --XX:+ExplicitGCInvokesConcurrent --XX:+HeapDumpOnOutOfMemoryError --XX:+ExitOnOutOfMemoryError --Djdk.attach.allowAttachSelf=true diff --git a/presto-native-execution/etc_coordinator/log.properties b/presto-native-execution/etc_coordinator/log.properties deleted file mode 100644 index 3abc29ce3d86a..0000000000000 --- a/presto-native-execution/etc_coordinator/log.properties +++ /dev/null @@ -1 +0,0 @@ -com.facebook.presto=INFO \ No newline at end of file diff --git a/presto-native-execution/etc_coordinator/node.properties b/presto-native-execution/etc_coordinator/node.properties deleted file mode 100644 index 977d0f74a5a14..0000000000000 --- a/presto-native-execution/etc_coordinator/node.properties +++ /dev/null @@ -1,3 +0,0 @@ -node.environment=production -node.id=coordinator -node.data-dir=/root/presto/presto-native-execution/data_coordinator \ No newline at end of file diff --git a/presto-native-execution/etc_worker/catalog/clp.properties b/presto-native-execution/etc_worker/catalog/clp.properties deleted file mode 100644 index 86439329be564..0000000000000 --- a/presto-native-execution/etc_worker/catalog/clp.properties +++ /dev/null @@ -1,11 +0,0 @@ -connector.name=clp -clp.metadata-db-host=localhost -clp.metadata-db-port=3306 -clp.metadata-db-user=clp-user -clp.metadata-db-password=password -clp.metadata-db-name=clp-db -clp.metadata-table-prefix=clp_ -clp.input-source=s3 -clp.s3-bucket=https://example.s3.us-east-1.amazonaws.com -clp.s3-key-prefix=test/ -clp.polymorphic-type-enabled=true \ No newline at end of file diff --git a/presto-native-execution/etc_worker/catalog/hive.properties b/presto-native-execution/etc_worker/catalog/hive.properties deleted file mode 100644 index ee8abe93af853..0000000000000 --- a/presto-native-execution/etc_worker/catalog/hive.properties +++ /dev/null @@ -1,5 +0,0 @@ -connector.name=hive-hadoop2 -hive.metastore=file -hive.metastore.catalog.dir=file:///root/presto/presto-native-execution/hive_catalog -hive.parquet.use-column-names=true -file-column-names-read-as-lower-case=true diff --git a/presto-native-execution/etc_worker/catalog/iceberg.properties b/presto-native-execution/etc_worker/catalog/iceberg.properties deleted file mode 100644 index f3a43dcb28126..0000000000000 --- a/presto-native-execution/etc_worker/catalog/iceberg.properties +++ /dev/null @@ -1 +0,0 @@ -connector.name=iceberg diff --git a/presto-native-execution/etc_worker/catalog/tpchstandard.properties b/presto-native-execution/etc_worker/catalog/tpchstandard.properties deleted file mode 100644 index 16e833ca8f436..0000000000000 --- a/presto-native-execution/etc_worker/catalog/tpchstandard.properties +++ /dev/null @@ -1 +0,0 @@ -connector.name=tpch \ No newline at end of file diff --git a/presto-native-execution/etc_worker/config.properties b/presto-native-execution/etc_worker/config.properties deleted file mode 100644 index 45bf71c311bb9..0000000000000 --- a/presto-native-execution/etc_worker/config.properties +++ /dev/null @@ -1,6 +0,0 @@ -discovery.uri=http://127.0.0.1:8080 -presto.version=0.290-SNAPSHOT-18d3ea5 -http-server.http.port=7777 -shutdown-onset-sec=1 -register-test-functions=false -runtime-metrics-collection-enabled=false diff --git a/presto-native-execution/etc_worker/node.properties b/presto-native-execution/etc_worker/node.properties deleted file mode 100644 index bc1c85cbded2a..0000000000000 --- a/presto-native-execution/etc_worker/node.properties +++ /dev/null @@ -1,4 +0,0 @@ -node.environment=production -node.internal-address=127.0.0.1 -node.location=testing-location -node.id=worker-1 \ No newline at end of file diff --git a/presto-native-execution/etc_worker/velox.properties b/presto-native-execution/etc_worker/velox.properties deleted file mode 100644 index 6c2506bd99a8e..0000000000000 --- a/presto-native-execution/etc_worker/velox.properties +++ /dev/null @@ -1 +0,0 @@ -mutable-config=true \ No newline at end of file From ed0397de7b7ce03d5472d917952c6ccadbe92250 Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 25 Mar 2025 02:37:04 +0000 Subject: [PATCH 103/126] revert some files back --- .gitmodules | 5 ++--- pom.xml | 1 - presto-native-execution/etc/catalog/hive.properties | 11 +---------- 3 files changed, 3 insertions(+), 14 deletions(-) diff --git a/.gitmodules b/.gitmodules index 087837ce3098f..6fb925ff13ecf 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,3 @@ [submodule "presto-native-execution/velox"] - path = presto-native-execution/velox - url = https://github.com/y-scope/velox.git - branch = clp_integration_s3 + path = presto-native-execution/velox + url = https://github.com/facebookincubator/velox.git diff --git a/pom.xml b/pom.xml index 85a955ca461b2..ca79bb2a54b70 100644 --- a/pom.xml +++ b/pom.xml @@ -2595,7 +2595,6 @@ - org.alluxio:alluxio-shaded-client org.codehaus.plexus:plexus-utils com.google.guava:guava com.fasterxml.jackson.core:jackson-annotations diff --git a/presto-native-execution/etc/catalog/hive.properties b/presto-native-execution/etc/catalog/hive.properties index 8eafe3bef1d85..466b7e664e44f 100644 --- a/presto-native-execution/etc/catalog/hive.properties +++ b/presto-native-execution/etc/catalog/hive.properties @@ -1,10 +1 @@ -connector.name=hive-hadoop2 -hive.metastore=file -<<<<<<< Updated upstream -hive.metastore.catalog.dir=file:///root/presto/presto-native-execution/hive_catalog -hive.parquet.use-column-names=true -file-column-names-read-as-lower-case=true -======= -hive.metastore.catalog.dir=file:///home/user/presto/hive_catalog -hive.compression-codec=zstd ->>>>>>> Stashed changes +connector.name=hive From 99455170fe79af782f2f490e2bc875280fcb950e Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 25 Mar 2025 03:07:58 +0000 Subject: [PATCH 104/126] revert presto-native-execution back --- .../presto_cpp/main/CMakeLists.txt | 3 +- .../presto_cpp/main/PrestoServer.cpp | 8 --- .../presto_cpp/main/tests/CMakeLists.txt | 2 - .../main/types/PrestoToVeloxConnector.cpp | 57 ------------------- .../main/types/PrestoToVeloxConnector.h | 27 --------- .../main/types/tests/CMakeLists.txt | 3 - .../presto_cpp/presto_protocol/Makefile | 9 --- .../presto_protocol/presto_protocol.cpp | 1 - .../presto_protocol/presto_protocol.h | 1 - presto-native-execution/velox | 2 +- 10 files changed, 2 insertions(+), 111 deletions(-) diff --git a/presto-native-execution/presto_cpp/main/CMakeLists.txt b/presto-native-execution/presto_cpp/main/CMakeLists.txt index 4db26887f9785..c06e00edf834c 100644 --- a/presto-native-execution/presto_cpp/main/CMakeLists.txt +++ b/presto-native-execution/presto_cpp/main/CMakeLists.txt @@ -99,14 +99,13 @@ set_property(TARGET presto_server_lib PROPERTY JOB_POOL_LINK presto_link_job_pool) add_executable(presto_server PrestoMain.cpp) -target_link_options(presto_server PRIVATE "-no-pie") # Moving velox_hive_connector and velox_tpch_connector to presto_server_lib # results in multiple link errors similar to the one below only on GCC. # "undefined reference to `vtable for velox::connector::tpch::TpchTableHandle`" # TODO: Fix these errors. target_link_libraries(presto_server presto_server_lib velox_hive_connector - velox_tpch_connector velox_clp_connector) + velox_tpch_connector) # Clang requires explicit linking with libatomic. if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" diff --git a/presto-native-execution/presto_cpp/main/PrestoServer.cpp b/presto-native-execution/presto_cpp/main/PrestoServer.cpp index 7da3a56ee7578..0473640632edf 100644 --- a/presto-native-execution/presto_cpp/main/PrestoServer.cpp +++ b/presto-native-execution/presto_cpp/main/PrestoServer.cpp @@ -48,7 +48,6 @@ #include "velox/common/memory/MmapAllocator.h" #include "velox/common/memory/SharedArbitrator.h" #include "velox/connectors/Connector.h" -#include "velox/connectors/clp/ClpConnector.h" #include "velox/connectors/hive/HiveConnector.h" #include "velox/connectors/hive/HiveDataSink.h" #include "velox/connectors/hive/storage_adapters/abfs/RegisterAbfsFileSystem.h" @@ -272,8 +271,6 @@ void PrestoServer::run() { std::make_unique("iceberg")); registerPrestoToVeloxConnector( std::make_unique("tpch")); - registerPrestoToVeloxConnector( - std::make_unique("clp")); // Presto server uses system catalog or system schema in other catalogs // in different places in the code. All these resolve to the SystemConnector. // Depending on where the operator or column is used, different prefixes can @@ -1186,11 +1183,6 @@ void PrestoServer::registerConnectorFactories() { velox::connector::registerConnectorFactory( std::make_shared()); } - if (!velox::connector::hasConnectorFactory( - velox::connector::clp::ClpConnectorFactory::kClpConnectorName)) { - velox::connector::registerConnectorFactory( - std::make_shared()); - } } std::vector PrestoServer::registerConnectors( diff --git a/presto-native-execution/presto_cpp/main/tests/CMakeLists.txt b/presto-native-execution/presto_cpp/main/tests/CMakeLists.txt index c599af9e9b558..1643ca5e17bab 100644 --- a/presto-native-execution/presto_cpp/main/tests/CMakeLists.txt +++ b/presto-native-execution/presto_cpp/main/tests/CMakeLists.txt @@ -23,7 +23,6 @@ add_executable( SessionPropertiesTest.cpp TaskManagerTest.cpp QueryContextManagerTest.cpp) -target_link_options(presto_server_test PRIVATE "-no-pie") if(DEFINED PRESTO_MEMORY_CHECKER_TYPE AND PRESTO_MEMORY_CHECKER_TYPE STREQUAL "LINUX_MEMORY_CHECKER") @@ -45,7 +44,6 @@ target_link_libraries( $ velox_hive_connector velox_tpch_connector - velox_clp_connector velox_presto_serializer velox_functions_prestosql velox_aggregates diff --git a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp index c4677c38d5dbf..c525f88e35300 100644 --- a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp +++ b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp @@ -13,15 +13,11 @@ */ #include "presto_cpp/main/types/PrestoToVeloxConnector.h" -#include "presto_cpp/presto_protocol/connector/clp/ClpConnectorProtocol.h" #include "presto_cpp/presto_protocol/connector/hive/HiveConnectorProtocol.h" #include "presto_cpp/presto_protocol/connector/iceberg/IcebergConnectorProtocol.h" #include "presto_cpp/presto_protocol/connector/tpch/TpchConnectorProtocol.h" #include -#include "velox/connectors/clp/ClpColumnHandle.h" -#include "velox/connectors/clp/ClpConnectorSplit.h" -#include "velox/connectors/clp/ClpTableHandle.h" #include "velox/connectors/hive/HiveConnector.h" #include "velox/connectors/hive/HiveConnectorSplit.h" #include "velox/connectors/hive/HiveDataSink.h" @@ -1556,57 +1552,4 @@ std::unique_ptr TpchPrestoToVeloxConnector::createConnectorProtocol() const { return std::make_unique(); } - -std::unique_ptr -ClpPrestoToVeloxConnector::toVeloxSplit( - const protocol::ConnectorId& catalogId, - const protocol::ConnectorSplit* connectorSplit, - const protocol::SplitContext* splitContext) const { - auto clpSplit = dynamic_cast(connectorSplit); - VELOX_CHECK_NOT_NULL( - clpSplit, "Unexpected split type {}", connectorSplit->_type); - return std::make_unique( - catalogId, - clpSplit->schemaTableName.schema, - clpSplit->schemaTableName.table, - clpSplit->archivePath); -} - -std::unique_ptr -ClpPrestoToVeloxConnector::toVeloxColumnHandle( - const protocol::ColumnHandle* column, - const TypeParser& typeParser) const { - auto clpColumn = dynamic_cast(column); - VELOX_CHECK_NOT_NULL( - clpColumn, "Unexpected column handle type {}", column->_type); - return std::make_unique( - clpColumn->columnName, - typeParser.parse(clpColumn->columnType), - clpColumn->nullable); -} - -std::unique_ptr -ClpPrestoToVeloxConnector::toVeloxTableHandle( - const protocol::TableHandle& tableHandle, - const VeloxExprConverter& exprConverter, - const TypeParser& typeParser, - std::unordered_map< - std::string, - std::shared_ptr>& assignments) const { - auto clpLayout = - std::dynamic_pointer_cast( - tableHandle.connectorTableLayout); - VELOX_CHECK_NOT_NULL( - clpLayout, - "Unexpected layout type {}", - tableHandle.connectorTableLayout->_type); - return std::make_unique( - tableHandle.connectorId, clpLayout->table.schemaTableName.table, clpLayout->query); -} - -std::unique_ptr -ClpPrestoToVeloxConnector::createConnectorProtocol() const { - return std::make_unique(); -} - } // namespace facebook::presto diff --git a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.h b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.h index ef3599717dbce..eb33dfb54ca1d 100644 --- a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.h +++ b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.h @@ -214,31 +214,4 @@ class TpchPrestoToVeloxConnector final : public PrestoToVeloxConnector { std::unique_ptr createConnectorProtocol() const final; }; - -class ClpPrestoToVeloxConnector final : public PrestoToVeloxConnector { - public: - explicit ClpPrestoToVeloxConnector(std::string connectorName) - : PrestoToVeloxConnector(std::move(connectorName)) {} - - std::unique_ptr toVeloxSplit( - const protocol::ConnectorId& catalogId, - const protocol::ConnectorSplit* connectorSplit, - const protocol::SplitContext* splitContext) const final; - - std::unique_ptr toVeloxColumnHandle( - const protocol::ColumnHandle* column, - const TypeParser& typeParser) const final; - - std::unique_ptr toVeloxTableHandle( - const protocol::TableHandle& tableHandle, - const VeloxExprConverter& exprConverter, - const TypeParser& typeParser, - std::unordered_map< - std::string, - std::shared_ptr>& assignments) - const final; - - std::unique_ptr createConnectorProtocol() - const final; -}; } // namespace facebook::presto diff --git a/presto-native-execution/presto_cpp/main/types/tests/CMakeLists.txt b/presto-native-execution/presto_cpp/main/types/tests/CMakeLists.txt index 953cb9376fe0e..28f73aff40b80 100644 --- a/presto-native-execution/presto_cpp/main/types/tests/CMakeLists.txt +++ b/presto-native-execution/presto_cpp/main/types/tests/CMakeLists.txt @@ -26,7 +26,6 @@ target_link_libraries( velox_dwio_orc_reader velox_hive_connector velox_tpch_connector - velox_clp_connector velox_exec velox_dwio_common_exception presto_type_converter @@ -63,7 +62,6 @@ target_link_libraries( velox_functions_lib velox_hive_connector velox_tpch_connector - velox_clp_connector velox_hive_partition_function velox_presto_serializer velox_serialization @@ -95,7 +93,6 @@ target_link_libraries( velox_dwio_common velox_hive_connector velox_tpch_connector - velox_clp_connector GTest::gtest GTest::gtest_main) diff --git a/presto-native-execution/presto_cpp/presto_protocol/Makefile b/presto-native-execution/presto_cpp/presto_protocol/Makefile index 204778d71fa33..3ee2b4e802b81 100644 --- a/presto-native-execution/presto_cpp/presto_protocol/Makefile +++ b/presto-native-execution/presto_cpp/presto_protocol/Makefile @@ -45,23 +45,14 @@ presto_protocol-cpp: presto_protocol-json chevron -d connector/tpch/presto_protocol_tpch.json connector/tpch/presto_protocol-json-hpp.mustache >> connector/tpch/presto_protocol_tpch.h clang-format -style=file -i connector/tpch/presto_protocol_tpch.h connector/tpch/presto_protocol_tpch.cpp - # build clp connector related structs - echo "// DO NOT EDIT : This file is generated by chevron" > connector/clp/presto_protocol_clp.cpp - chevron -d connector/clp/presto_protocol_clp.json connector/clp/presto_protocol-json-cpp.mustache >> connector/clp/presto_protocol_clp.cpp - echo "// DO NOT EDIT : This file is generated by chevron" > connector/clp/presto_protocol_clp.h - chevron -d connector/clp/presto_protocol_clp.json connector/clp/presto_protocol-json-hpp.mustache >> connector/clp/presto_protocol_clp.h - clang-format -style=file -i connector/clp/presto_protocol_clp.h connector/clp/presto_protocol_clp.cpp - presto_protocol-json: ./java-to-struct-json.py --config core/presto_protocol_core.yml core/special/*.java core/special/*.inc -j | jq . > core/presto_protocol_core.json ./java-to-struct-json.py --config connector/hive/presto_protocol_hive.yml connector/hive/special/*.inc -j | jq . > connector/hive/presto_protocol_hive.json ./java-to-struct-json.py --config connector/iceberg/presto_protocol_iceberg.yml connector/iceberg/special/*.inc -j | jq . > connector/iceberg/presto_protocol_iceberg.json ./java-to-struct-json.py --config connector/tpch/presto_protocol_tpch.yml connector/tpch/special/*.inc -j | jq . > connector/tpch/presto_protocol_tpch.json - ./java-to-struct-json.py --config connector/clp/presto_protocol_clp.yml connector/clp/special/*.inc -j | jq . > connector/clp/presto_protocol_clp.json presto_protocol.proto: presto_protocol-json pystache presto_protocol-protobuf.mustache core/presto_protocol_core.json > core/presto_protocol_core.proto pystache presto_protocol-protobuf.mustache connector/hive/presto_protocol_hive.json > connector/hive/presto_protocol_hive.proto pystache presto_protocol-protobuf.mustache connector/iceberg/presto_protocol_iceberg.json > connector/iceberg/presto_protocol_iceberg.proto pystache presto_protocol-protobuf.mustache connector/tpch/presto_protocol_tpch.json > connector/tpch/presto_protocol_tpch.proto - pystache presto_protocol-protobuf.mustache connector/clp/presto_protocol_clp.json > connector/clp/presto_protocol_clp.proto diff --git a/presto-native-execution/presto_cpp/presto_protocol/presto_protocol.cpp b/presto-native-execution/presto_cpp/presto_protocol/presto_protocol.cpp index c21bd69857e14..c15084817a434 100644 --- a/presto-native-execution/presto_cpp/presto_protocol/presto_protocol.cpp +++ b/presto-native-execution/presto_cpp/presto_protocol/presto_protocol.cpp @@ -15,7 +15,6 @@ // DEPRECATED: This file is deprecated and will be removed in future versions. -#include "presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.cpp" #include "presto_cpp/presto_protocol/connector/hive/presto_protocol_hive.cpp" #include "presto_cpp/presto_protocol/connector/iceberg/presto_protocol_iceberg.cpp" #include "presto_cpp/presto_protocol/connector/tpch/presto_protocol_tpch.cpp" diff --git a/presto-native-execution/presto_cpp/presto_protocol/presto_protocol.h b/presto-native-execution/presto_cpp/presto_protocol/presto_protocol.h index 876567fe12307..dd94975e3760d 100644 --- a/presto-native-execution/presto_cpp/presto_protocol/presto_protocol.h +++ b/presto-native-execution/presto_cpp/presto_protocol/presto_protocol.h @@ -16,7 +16,6 @@ // DEPRECATED: This file is deprecated and will be removed in future versions. -#include "presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h" #include "presto_cpp/presto_protocol/connector/hive/presto_protocol_hive.h" #include "presto_cpp/presto_protocol/connector/iceberg/presto_protocol_iceberg.h" #include "presto_cpp/presto_protocol/connector/tpch/presto_protocol_tpch.h" diff --git a/presto-native-execution/velox b/presto-native-execution/velox index 16932ecfc32b9..c550daba47db2 160000 --- a/presto-native-execution/velox +++ b/presto-native-execution/velox @@ -1 +1 @@ -Subproject commit 16932ecfc32b94734640e44d03cbb1e5361c50c6 +Subproject commit c550daba47db2a17221c8ec0144450f32bd9a56a From 57a8aa94a376213653eb81153957623452c40c66 Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 25 Mar 2025 03:10:08 +0000 Subject: [PATCH 105/126] remove clp files --- .../connector/clp/ClpConnectorProtocol.h | 29 ---- .../clp/presto_protocol-json-cpp.mustache | 146 ---------------- .../clp/presto_protocol-json-hpp.mustache | 68 -------- .../connector/clp/presto_protocol_clp.cpp | 156 ------------------ .../connector/clp/presto_protocol_clp.h | 95 ----------- .../connector/clp/presto_protocol_clp.json | 114 ------------- .../connector/clp/presto_protocol_clp.yml | 39 ----- .../clp/special/ClpColumnHandle.hpp.inc | 33 ---- .../clp/special/ClpTransactionHandle.cpp.inc | 30 ---- .../clp/special/ClpTransactionHandle.hpp.inc | 28 ---- 10 files changed, 738 deletions(-) delete mode 100644 presto-native-execution/presto_cpp/presto_protocol/connector/clp/ClpConnectorProtocol.h delete mode 100644 presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol-json-cpp.mustache delete mode 100644 presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol-json-hpp.mustache delete mode 100644 presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.cpp delete mode 100644 presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h delete mode 100644 presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.json delete mode 100644 presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.yml delete mode 100644 presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpColumnHandle.hpp.inc delete mode 100644 presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpTransactionHandle.cpp.inc delete mode 100644 presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpTransactionHandle.hpp.inc diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/ClpConnectorProtocol.h b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/ClpConnectorProtocol.h deleted file mode 100644 index 5b1e76b4606c4..0000000000000 --- a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/ClpConnectorProtocol.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once -#include "presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h" -#include "presto_cpp/presto_protocol/core/ConnectorProtocol.h" - -namespace facebook::presto::protocol::clp { -using ClpConnectorProtocol = ConnectorProtocolTemplate< - ClpTableHandle, - ClpTableLayoutHandle, - ClpColumnHandle, - NotImplemented, - NotImplemented, - ClpSplit, - NotImplemented, - ClpTransactionHandle, - NotImplemented>; -} // namespace facebook::presto::protocol::clp diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol-json-cpp.mustache b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol-json-cpp.mustache deleted file mode 100644 index f30beed5a875a..0000000000000 --- a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol-json-cpp.mustache +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -// presto_protocol.prolog.cpp -// - -{{#.}} -{{#comment}} -{{comment}} -{{/comment}} -{{/.}} - - -#include "presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h" -using namespace std::string_literals; - -namespace facebook::presto::protocol::clp { - -void to_json(json& j, const ClpTransactionHandle& p) { - j = json::array(); - j.push_back(p._type); - j.push_back(p.instance); -} - -void from_json(const json& j, ClpTransactionHandle& p) { - j[0].get_to(p._type); - j[1].get_to(p.instance); -} -} // namespace facebook::presto::protocol -{{#.}} -{{#cinc}} -{{&cinc}} -{{/cinc}} -{{^cinc}} -{{#struct}} -namespace facebook::presto::protocol::clp { - {{#super_class}} - {{&class_name}}::{{&class_name}}() noexcept { - _type = "{{json_key}}"; - } - {{/super_class}} - - void to_json(json& j, const {{&class_name}}& p) { - j = json::object(); - {{#super_class}} - j["@type"] = "{{&json_key}}"; - {{/super_class}} - {{#fields}} - to_json_key(j, "{{&field_name}}", p.{{field_name}}, "{{&class_name}}", "{{&field_text}}", "{{&field_name}}"); - {{/fields}} - } - - void from_json(const json& j, {{&class_name}}& p) { - {{#super_class}} - p._type = j["@type"]; - {{/super_class}} - {{#fields}} - from_json_key(j, "{{&field_name}}", p.{{field_name}}, "{{&class_name}}", "{{&field_text}}", "{{&field_name}}"); - {{/fields}} - } -} -{{/struct}} -{{#enum}} -namespace facebook::presto::protocol::clp { - //Loosly copied this here from NLOHMANN_JSON_SERIALIZE_ENUM() - - // NOLINTNEXTLINE: cppcoreguidelines-avoid-c-arrays - static const std::pair<{{&class_name}}, json> - {{&class_name}}_enum_table[] = { // NOLINT: cert-err58-cpp - {{#elements}} - { {{&class_name}}::{{&element}}, "{{&element}}" }{{^_last}},{{/_last}} - {{/elements}} - }; - void to_json(json& j, const {{&class_name}}& e) - { - static_assert(std::is_enum<{{&class_name}}>::value, "{{&class_name}} must be an enum!"); - const auto* it = std::find_if(std::begin({{&class_name}}_enum_table), std::end({{&class_name}}_enum_table), - [e](const std::pair<{{&class_name}}, json>& ej_pair) -> bool - { - return ej_pair.first == e; - }); - j = ((it != std::end({{&class_name}}_enum_table)) ? it : std::begin({{&class_name}}_enum_table))->second; - } - void from_json(const json& j, {{&class_name}}& e) - { - static_assert(std::is_enum<{{&class_name}}>::value, "{{&class_name}} must be an enum!"); - const auto* it = std::find_if(std::begin({{&class_name}}_enum_table), std::end({{&class_name}}_enum_table), - [&j](const std::pair<{{&class_name}}, json>& ej_pair) -> bool - { - return ej_pair.second == j; - }); - e = ((it != std::end({{&class_name}}_enum_table)) ? it : std::begin({{&class_name}}_enum_table))->first; - } -} -{{/enum}} -{{#abstract}} -namespace facebook::presto::protocol::clp { - void to_json(json& j, const std::shared_ptr<{{&class_name}}>& p) { - if ( p == nullptr ) { - return; - } - String type = p->_type; - - {{#subclasses}} - if ( type == "{{&key}}" ) { - j = *std::static_pointer_cast<{{&type}}>(p); - return; - } - {{/subclasses}} - - throw TypeError(type + " no abstract type {{&class_name}} {{&key}}"); - } - - void from_json(const json& j, std::shared_ptr<{{&class_name}}>& p) { - String type; - try { - type = p->getSubclassKey(j); - } catch (json::parse_error &e) { - throw ParseError(std::string(e.what()) + " {{&class_name}} {{&key}} {{&class_name}}"); - } - - {{#subclasses}} - if ( type == "{{&key}}" ) { - std::shared_ptr<{{&type}}> k = std::make_shared<{{&type}}>(); - j.get_to(*k); - p = std::static_pointer_cast<{{&class_name}}>(k); - return; - } - {{/subclasses}} - - throw TypeError(type + " no abstract type {{&class_name}} {{&key}}"); - } -} -{{/abstract}} -{{/cinc}} -{{/.}} diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol-json-hpp.mustache b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol-json-hpp.mustache deleted file mode 100644 index f903bd681a5c2..0000000000000 --- a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol-json-hpp.mustache +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -{{#.}} -{{#comment}} -{{comment}} -{{/comment}} -{{/.}} - -#include -#include - -#include "presto_cpp/external/json/nlohmann/json.hpp" -#include "presto_cpp/presto_protocol/core/presto_protocol_core.h" - -namespace facebook::presto::protocol::clp { -struct ClpTransactionHandle : public ConnectorTransactionHandle { - String instance = {}; - }; -void to_json(json& j, const ClpTransactionHandle& p); - -void from_json(const json& j, ClpTransactionHandle& p); -} //namespace facebook::presto::protocol -{{#.}} -{{#hinc}} -{{&hinc}} -{{/hinc}} -{{^hinc}} -{{#struct}} -namespace facebook::presto::protocol::clp { - struct {{class_name}} {{#super_class}}: public {{super_class}}{{/super_class}}{ - {{#fields}} - {{#field_local}}{{#optional}}std::shared_ptr<{{/optional}}{{&field_text}}{{#optional}}>{{/optional}} {{&field_name}} = {};{{/field_local}} - {{/fields}} - - {{#super_class}} - {{class_name}}() noexcept; - {{/super_class}} - }; - void to_json(json& j, const {{class_name}}& p); - void from_json(const json& j, {{class_name}}& p); -} -{{/struct}} -{{#enum}} -namespace facebook::presto::protocol::clp { - enum class {{class_name}} { - {{#elements}} - {{&element}}{{^_last}},{{/_last}} - {{/elements}} - }; - extern void to_json(json& j, const {{class_name}}& e); - extern void from_json(const json& j, {{class_name}}& e); -} -{{/enum}} -{{/hinc}} -{{/.}} diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.cpp b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.cpp deleted file mode 100644 index 72e1846cafe22..0000000000000 --- a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.cpp +++ /dev/null @@ -1,156 +0,0 @@ -// DO NOT EDIT : This file is generated by chevron -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -// presto_protocol.prolog.cpp -// - -// This file is generated DO NOT EDIT @generated - -#include "presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h" -using namespace std::string_literals; - -namespace facebook::presto::protocol::clp { - -void to_json(json& j, const ClpTransactionHandle& p) { - j = json::array(); - j.push_back(p._type); - j.push_back(p.instance); -} - -void from_json(const json& j, ClpTransactionHandle& p) { - j[0].get_to(p._type); - j[1].get_to(p.instance); -} -} // namespace facebook::presto::protocol::clp -namespace facebook::presto::protocol::clp { -ClpColumnHandle::ClpColumnHandle() noexcept { - _type = "clp"; -} - -void to_json(json& j, const ClpColumnHandle& p) { - j = json::object(); - j["@type"] = "clp"; - to_json_key( - j, "columnName", p.columnName, "ClpColumnHandle", "String", "columnName"); - to_json_key( - j, - "originalColumnName", - p.originalColumnName, - "ClpColumnHandle", - "String", - "originalColumnName"); - to_json_key( - j, "columnType", p.columnType, "ClpColumnHandle", "Type", "columnType"); - to_json_key(j, "nullable", p.nullable, "ClpColumnHandle", "bool", "nullable"); -} - -void from_json(const json& j, ClpColumnHandle& p) { - p._type = j["@type"]; - from_json_key( - j, "columnName", p.columnName, "ClpColumnHandle", "String", "columnName"); - from_json_key( - j, - "originalColumnName", - p.originalColumnName, - "ClpColumnHandle", - "String", - "originalColumnName"); - from_json_key( - j, "columnType", p.columnType, "ClpColumnHandle", "Type", "columnType"); - from_json_key( - j, "nullable", p.nullable, "ClpColumnHandle", "bool", "nullable"); -} -} // namespace facebook::presto::protocol::clp -namespace facebook::presto::protocol::clp { -ClpSplit::ClpSplit() noexcept { - _type = "clp"; -} - -void to_json(json& j, const ClpSplit& p) { - j = json::object(); - j["@type"] = "clp"; - to_json_key( - j, - "schemaTableName", - p.schemaTableName, - "ClpSplit", - "SchemaTableName", - "schemaTableName"); - to_json_key( - j, "archivePath", p.archivePath, "ClpSplit", "String", "archivePath"); - to_json_key(j, "query", p.query, "ClpSplit", "String", "query"); -} - -void from_json(const json& j, ClpSplit& p) { - p._type = j["@type"]; - from_json_key( - j, - "schemaTableName", - p.schemaTableName, - "ClpSplit", - "SchemaTableName", - "schemaTableName"); - from_json_key( - j, "archivePath", p.archivePath, "ClpSplit", "String", "archivePath"); - from_json_key(j, "query", p.query, "ClpSplit", "String", "query"); -} -} // namespace facebook::presto::protocol::clp -namespace facebook::presto::protocol::clp { -ClpTableHandle::ClpTableHandle() noexcept { - _type = "clp"; -} - -void to_json(json& j, const ClpTableHandle& p) { - j = json::object(); - j["@type"] = "clp"; - to_json_key( - j, - "schemaTableName", - p.schemaTableName, - "ClpTableHandle", - "SchemaTableName", - "schemaTableName"); -} - -void from_json(const json& j, ClpTableHandle& p) { - p._type = j["@type"]; - from_json_key( - j, - "schemaTableName", - p.schemaTableName, - "ClpTableHandle", - "SchemaTableName", - "schemaTableName"); -} -} // namespace facebook::presto::protocol::clp -namespace facebook::presto::protocol::clp { -ClpTableLayoutHandle::ClpTableLayoutHandle() noexcept { - _type = "clp"; -} - -void to_json(json& j, const ClpTableLayoutHandle& p) { - j = json::object(); - j["@type"] = "clp"; - to_json_key( - j, "table", p.table, "ClpTableLayoutHandle", "ClpTableHandle", "table"); - to_json_key(j, "query", p.query, "ClpTableLayoutHandle", "String", "query"); -} - -void from_json(const json& j, ClpTableLayoutHandle& p) { - p._type = j["@type"]; - from_json_key( - j, "table", p.table, "ClpTableLayoutHandle", "ClpTableHandle", "table"); - from_json_key(j, "query", p.query, "ClpTableLayoutHandle", "String", "query"); -} -} // namespace facebook::presto::protocol::clp diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h deleted file mode 100644 index bfa29311e5641..0000000000000 --- a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.h +++ /dev/null @@ -1,95 +0,0 @@ -// DO NOT EDIT : This file is generated by chevron -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -// This file is generated DO NOT EDIT @generated - -#include -#include - -#include "presto_cpp/external/json/nlohmann/json.hpp" -#include "presto_cpp/presto_protocol/core/presto_protocol_core.h" - -namespace facebook::presto::protocol::clp { -struct ClpTransactionHandle : public ConnectorTransactionHandle { - String instance = {}; -}; -void to_json(json& j, const ClpTransactionHandle& p); - -void from_json(const json& j, ClpTransactionHandle& p); -} // namespace facebook::presto::protocol::clp -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// ClpColumnHandle is special since it needs an implementation of -// operator<(). - -namespace facebook::presto::protocol::clp { -struct ClpColumnHandle : public ColumnHandle { - String columnName = {}; - String originalColumnName = {}; - Type columnType = {}; - boolean nullable = {}; - - ClpColumnHandle() noexcept; - - bool operator<(const ColumnHandle& o) const override { - return columnName < dynamic_cast(o).columnName; - } -}; -void to_json(json& j, const ClpColumnHandle& p); -void from_json(const json& j, ClpColumnHandle& p); -} // namespace facebook::presto::protocol::clp -namespace facebook::presto::protocol::clp { -struct ClpSplit : public ConnectorSplit { - SchemaTableName schemaTableName = {}; - String archivePath = {}; - std::shared_ptr query = {}; - - ClpSplit() noexcept; -}; -void to_json(json& j, const ClpSplit& p); -void from_json(const json& j, ClpSplit& p); -} // namespace facebook::presto::protocol::clp -namespace facebook::presto::protocol::clp { -struct ClpTableHandle : public ConnectorTableHandle { - SchemaTableName schemaTableName = {}; - - ClpTableHandle() noexcept; -}; -void to_json(json& j, const ClpTableHandle& p); -void from_json(const json& j, ClpTableHandle& p); -} // namespace facebook::presto::protocol::clp -namespace facebook::presto::protocol::clp { -struct ClpTableLayoutHandle : public ConnectorTableLayoutHandle { - ClpTableHandle table = {}; - std::shared_ptr query = {}; - - ClpTableLayoutHandle() noexcept; -}; -void to_json(json& j, const ClpTableLayoutHandle& p); -void from_json(const json& j, ClpTableLayoutHandle& p); -} // namespace facebook::presto::protocol::clp diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.json b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.json deleted file mode 100644 index 9f22728b175e8..0000000000000 --- a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.json +++ /dev/null @@ -1,114 +0,0 @@ -[ - { - "comment": "// This file is generated DO NOT EDIT @generated" - }, - { - "class_name": "ClpColumnHandle", - "hinc": "/*\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n// ClpColumnHandle is special since it needs an implementation of\n// operator<().\n\nnamespace facebook::presto::protocol::clp {\nstruct ClpColumnHandle : public ColumnHandle {\n String columnName = {};\n String originalColumnName = {};\n Type columnType = {};\n boolean nullable = {};\n\n ClpColumnHandle() noexcept;\n\n bool operator<(const ColumnHandle& o) const override {\n return columnName < dynamic_cast(o).columnName;\n }\n};\nvoid to_json(json& j, const ClpColumnHandle& p);\nvoid from_json(const json& j, ClpColumnHandle& p);\n} // namespace facebook::presto::protocol::clp", - "struct": true, - "fields": [ - { - "field_type": "String", - "field_name": "columnName", - "field_text": "String", - "_N": 1, - "field_local": true - }, - { - "field_type": "String", - "field_name": "originalColumnName", - "field_text": "String", - "_N": 2, - "field_local": true - }, - { - "field_type": "Type", - "field_name": "columnType", - "field_text": "Type", - "_N": 3, - "field_local": true - }, - { - "field_type": "boolean", - "field_name": "nullable", - "field_text": "bool", - "_N": 4, - "field_local": true - } - ], - "subclass": true, - "super_class": "ColumnHandle", - "json_key": "clp" - }, - { - "class_name": "ClpSplit", - "struct": true, - "fields": [ - { - "field_type": "SchemaTableName", - "field_name": "schemaTableName", - "field_text": "SchemaTableName", - "_N": 1, - "field_local": true - }, - { - "field_type": "String", - "field_name": "archivePath", - "field_text": "String", - "_N": 2, - "field_local": true - }, - { - "field_type": "Optional", - "field_name": "query", - "field_text": "String", - "optional": true, - "_N": 3, - "field_local": true - } - ], - "subclass": true, - "super_class": "ConnectorSplit", - "json_key": "clp" - }, - { - "class_name": "ClpTableHandle", - "struct": true, - "fields": [ - { - "field_type": "SchemaTableName", - "field_name": "schemaTableName", - "field_text": "SchemaTableName", - "_N": 1, - "field_local": true - } - ], - "subclass": true, - "super_class": "ConnectorTableHandle", - "json_key": "clp" - }, - { - "class_name": "ClpTableLayoutHandle", - "struct": true, - "fields": [ - { - "field_type": "ClpTableHandle", - "field_name": "table", - "field_text": "ClpTableHandle", - "_N": 1, - "field_local": true - }, - { - "field_type": "Optional", - "field_name": "query", - "field_text": "String", - "optional": true, - "_N": 2, - "field_local": true - } - ], - "subclass": true, - "super_class": "ConnectorTableLayoutHandle", - "json_key": "clp" - } -] diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.yml b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.yml deleted file mode 100644 index 0abb104d564e0..0000000000000 --- a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/presto_protocol_clp.yml +++ /dev/null @@ -1,39 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -AbstractClasses: - ColumnHandle: - super: JsonEncodedSubclass - comparable: true - subclasses: - - { name: ClpColumnHandle, key: clp } - - ConnectorTableHandle: - super: JsonEncodedSubclass - subclasses: - - { name: ClpTableHandle, key: clp } - - ConnectorTableLayoutHandle: - super: JsonEncodedSubclass - subclasses: - - { name: ClpTableLayoutHandle, key: clp } - - ConnectorSplit: - super: JsonEncodedSubclass - subclasses: - - { name: ClpSplit, key: clp } - -JavaClasses: - - presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java - - presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java - - presto-clp/src/main/java/com/yscope/presto/ClpTableLayoutHandle.java - - presto-clp/src/main/java/com/yscope/presto/ClpSplit.java diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpColumnHandle.hpp.inc b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpColumnHandle.hpp.inc deleted file mode 100644 index bb076b8ff23db..0000000000000 --- a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpColumnHandle.hpp.inc +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// ClpColumnHandle is special since it needs an implementation of -// operator<(). - -namespace facebook::presto::protocol::clp { -struct ClpColumnHandle : public ColumnHandle { - String columnName = {}; - String originalColumnName = {}; - Type columnType = {}; - boolean nullable = {}; - - ClpColumnHandle() noexcept; - - bool operator<(const ColumnHandle& o) const override { - return columnName < dynamic_cast(o).columnName; - } -}; -void to_json(json& j, const ClpColumnHandle& p); -void from_json(const json& j, ClpColumnHandle& p); -} // namespace facebook::presto::protocol::clp diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpTransactionHandle.cpp.inc b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpTransactionHandle.cpp.inc deleted file mode 100644 index a753f42ab61f1..0000000000000 --- a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpTransactionHandle.cpp.inc +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// ClpTransactionHandle is special since -// the corresponding class in Java is an enum. - -namespace facebook::presto::protocol::clp { - -void to_json(json& j, const ClpTransactionHandle& p) { - j = json::array(); - j.push_back(p._type); - j.push_back(p.instance); -} - -void from_json(const json& j, ClpTransactionHandle& p) { - j[0].get_to(p._type); - j[1].get_to(p.instance); -} -} // namespace facebook::presto::protocol::clp diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpTransactionHandle.hpp.inc b/presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpTransactionHandle.hpp.inc deleted file mode 100644 index fc873366389eb..0000000000000 --- a/presto-native-execution/presto_cpp/presto_protocol/connector/clp/special/ClpTransactionHandle.hpp.inc +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// ClpTransactionHandle is special since -// the corresponding class in Java is an enum. - -namespace facebook::presto::protocol::clp { - -struct ClpTransactionHandle : public ConnectorTransactionHandle { - String instance = {}; -}; - -void to_json(json& j, const ClpTransactionHandle& p); - -void from_json(const json& j, ClpTransactionHandle& p); - -} // namespace facebook::presto::protocol::clp From 31816a240a3dd5a5d831b45f7ce0ed855fee43af Mon Sep 17 00:00:00 2001 From: wraymo Date: Wed, 26 Mar 2025 02:46:23 +0000 Subject: [PATCH 106/126] add row type in clp --- .../java/com/yscope/presto/ClpErrorCode.java | 1 + .../metadata/ClpMySQLMetadataProvider.java | 32 +---- .../yscope/presto/metadata/ClpSchemaTree.java | 133 ++++++++++++++++++ 3 files changed, 137 insertions(+), 29 deletions(-) create mode 100644 presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaTree.java diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpErrorCode.java b/presto-clp/src/main/java/com/yscope/presto/ClpErrorCode.java index 90907964173d8..25dcd9fb5192b 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpErrorCode.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpErrorCode.java @@ -22,6 +22,7 @@ public enum ClpErrorCode implements ErrorCodeSupplier { + CLP_UNSUPPORTED_TYPE(0, EXTERNAL), CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION(0, EXTERNAL), CLP_UNSUPPORTED_METADATA_SOURCE(1, EXTERNAL), CLP_UNSUPPORTED_SPLIT_SOURCE(2, EXTERNAL); diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java index a3b89842756a2..511b40c898f87 100644 --- a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java @@ -67,49 +67,23 @@ private Connection getConnection() throws SQLException return connection; } - // TODO(Rui): Consider move it to a util class - private Type mapColumnType(byte type) - { - switch (ClpNodeType.fromType(type)) { - case Integer: - return BigintType.BIGINT; - case Float: - return DoubleType.DOUBLE; - case ClpString: - case VarString: - case DateString: - case NullValue: - return VarcharType.VARCHAR; - case UnstructuredArray: - return new ArrayType(VarcharType.VARCHAR); - case Boolean: - return BooleanType.BOOLEAN; - default: - throw new IllegalArgumentException("Unknown column type: " + type); - } - } - @Override public List listColumnHandles(SchemaTableName schemaTableName) { - List columnHandles = new ArrayList<>(); String query = String.format(QUERY_SELECT_COLUMNS, config.getMetadataTablePrefix(), schemaTableName.getTableName()); - + ClpSchemaTree schemaTree = new ClpSchemaTree(config.isPolymorphicTypeEnabled()); try (Connection connection = getConnection(); PreparedStatement statement = connection.prepareStatement(query)) { try (ResultSet resultSet = statement.executeQuery()) { while (resultSet.next()) { - columnHandles.add(new ClpColumnHandle( - resultSet.getString("name"), - mapColumnType(resultSet.getByte("type")), - true)); + schemaTree.addColumn(resultSet.getString("name"), resultSet.getByte("type")); } } } catch (SQLException e) { log.error("Failed to load table schema for %s: %s" + schemaTableName.getTableName(), e); } - return columnHandles; + return schemaTree.collectColumnHandles(); } @Override diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaTree.java b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaTree.java new file mode 100644 index 0000000000000..dbf0e3c04ba16 --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaTree.java @@ -0,0 +1,133 @@ +package com.yscope.presto.metadata; + +import com.facebook.presto.common.type.ArrayType; +import com.facebook.presto.common.type.BigintType; +import com.facebook.presto.common.type.BooleanType; +import com.facebook.presto.common.type.DoubleType; +import com.facebook.presto.common.type.RowType; +import com.facebook.presto.common.type.Type; +import com.facebook.presto.common.type.VarcharType; +import com.facebook.presto.spi.PrestoException; +import com.yscope.presto.ClpColumnHandle; +import com.yscope.presto.ClpErrorCode; +import com.yscope.presto.ClpExpression; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; + +public class ClpSchemaTree { + static class ClpNode { + Type type; // Only non-null for leaf nodes + Map children = new HashMap<>(); + Set conflictingBaseNames = new HashSet<>(); + + boolean isLeaf() { + return children.isEmpty(); + } + } + + private final ClpNode root; + private final boolean polymorphicTypeEnabled; + ClpSchemaTree(boolean polymorphicTypeEnabled) + { + this.polymorphicTypeEnabled = polymorphicTypeEnabled; + this.root = new ClpNode(); + } + + private Type mapColumnType(byte type) + { + switch (ClpNodeType.fromType(type)) { + case Integer: + return BigintType.BIGINT; + case Float: + return DoubleType.DOUBLE; + case ClpString: + case VarString: + case DateString: + case NullValue: + return VarcharType.VARCHAR; + case UnstructuredArray: + return new ArrayType(VarcharType.VARCHAR); + case Boolean: + return BooleanType.BOOLEAN; + default: + throw new PrestoException(ClpErrorCode.CLP_UNSUPPORTED_TYPE, "Unsupported type: " + type); + } + } + + public void addColumn(String fullName, byte type) { + Type prestoType = mapColumnType(type); + String[] path = fullName.split("\\."); + ClpNode current = root; + + for (int i = 0; i < path.length - 1; i++) { + String segment = path[i]; + current.children.putIfAbsent(segment, new ClpNode()); + current = current.children.get(segment); + } + + String leafName = path[path.length - 1]; + String finalLeafName = leafName; + + if (polymorphicTypeEnabled) { + boolean conflictDetected = false; + + if (current.children.containsKey(leafName)) { + ClpNode existing = current.children.get(leafName); + + if (existing.type != null && !existing.type.equals(prestoType)) { + String existingSuffix = existing.type.getDisplayName().toLowerCase(); + String renamedExisting = leafName + "_" + existingSuffix; + + current.children.remove(leafName); + current.children.put(renamedExisting, existing); + + current.conflictingBaseNames.add(leafName); + conflictDetected = true; + } + } else if (current.conflictingBaseNames.contains(leafName)) { + conflictDetected = true; + } + + if (conflictDetected) { + String newSuffix = prestoType.getDisplayName().toLowerCase(); + finalLeafName = leafName + "_" + newSuffix; + } + } + + ClpNode leaf = new ClpNode(); + leaf.type = prestoType; + current.children.put(finalLeafName, leaf); + } + + public List collectColumnHandles() { + List columns = new ArrayList<>(); + for (Map.Entry entry : root.children.entrySet()) { + String name = entry.getKey(); + ClpNode child = entry.getValue(); + if (child.isLeaf()) { + columns.add(new ClpColumnHandle(name, child.type, true)); + } else { + Type rowType = buildRowType(child); + columns.add(new ClpColumnHandle(name, rowType, true)); + } + } + return columns; + } + + private Type buildRowType(ClpNode node) { + List fields = new ArrayList<>(); + for (Map.Entry entry : node.children.entrySet()) { + String name = entry.getKey(); + ClpNode child = entry.getValue(); + Type fieldType = child.isLeaf() ? child.type : buildRowType(child); + fields.add(new RowType.Field(Optional.of(name), fieldType)); + } + return RowType.from(fields); + } +} From 2747ec64444484d526b757129d74d89eb294d417 Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 28 Mar 2025 13:52:25 +0000 Subject: [PATCH 107/126] add optimizer logic for row type --- .../presto/ClpFilterToKqlConverter.java | 60 +++++++++++++++++++ .../com/yscope/presto/TestClpQueryBase.java | 10 +++- 2 files changed, 67 insertions(+), 3 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java index fa70355701407..2e6f53e93ee66 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java @@ -14,6 +14,7 @@ package com.yscope.presto; import com.facebook.presto.common.function.OperatorType; +import com.facebook.presto.common.type.RowType; import com.facebook.presto.common.type.Type; import com.facebook.presto.common.type.VarcharType; import com.facebook.presto.spi.ColumnHandle; @@ -32,6 +33,7 @@ import io.airlift.slice.Slice; import java.util.ArrayList; +import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; @@ -202,6 +204,62 @@ private ClpExpression handleIsNull(SpecialFormExpression node) return new ClpExpression(String.format("NOT %s: *", variableName)); } + private ClpExpression handleDeferenceImpl(RowExpression node) + { + if (node instanceof VariableReferenceExpression) { + return node.accept(this, null); + } + + if (!(node instanceof SpecialFormExpression)) { + return new ClpExpression(node); + } + + SpecialFormExpression specialForm = (SpecialFormExpression) node; + List arguments = specialForm.getArguments(); + if (arguments.size() != 2) { + throw new PrestoException(CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, "DEREFERENCE expects 2 arguments"); + } + + RowExpression base = arguments.get(0); + RowExpression index = arguments.get(1); + if (!(index instanceof ConstantExpression)) { + throw new PrestoException(CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, "DEREFERENCE index must be a constant"); + } + + ConstantExpression constExpr = (ConstantExpression) index; + Object value = constExpr.getValue(); + if (!(value instanceof Long)) { + throw new PrestoException(CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, "DEREFERENCE index constant is not a long"); + } + + int fieldIndex = ((Long) value).intValue(); + + Type baseType = base.getType(); + if (!(baseType instanceof RowType)) { + throw new PrestoException(CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, "DEREFERENCE base is not a RowType: " + baseType); + } + + RowType rowType = (RowType) baseType; + if (fieldIndex < 0 || fieldIndex >= rowType.getFields().size()) { + throw new PrestoException(CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, + "Invalid field index " + fieldIndex + " for RowType: " + rowType); + } + + RowType.Field field = rowType.getFields().get(fieldIndex); + String fieldName = field.getName().orElse("field" + fieldIndex); + + ClpExpression baseString = handleDeferenceImpl(base); + if (!baseString.getDefinition().isPresent()) { + return new ClpExpression(node); + } + return new ClpExpression(baseString.getDefinition() + "." + fieldName); + } + + private ClpExpression handleDereference(SpecialFormExpression expression) + { + return handleDeferenceImpl(expression); + } + // Only handles the case where there is a SQL wildcard in the middle of the string private ClpExpression handleLike(CallExpression node) { @@ -537,6 +595,8 @@ public ClpExpression visitSpecialForm(SpecialFormExpression node, Void context) return handleIn(node); case IS_NULL: return handleIsNull(node); + case DEREFERENCE: + return handleDereference(node); default: return new ClpExpression(node); } diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpQueryBase.java b/presto-clp/src/test/java/com/yscope/presto/TestClpQueryBase.java index 42f5f51860d30..a039dc2056944 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpQueryBase.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpQueryBase.java @@ -15,6 +15,7 @@ import com.facebook.presto.Session; import com.facebook.presto.SystemSessionProperties; +import com.facebook.presto.common.type.RowType; import com.facebook.presto.common.type.Type; import com.facebook.presto.metadata.FunctionAndTypeManager; import com.facebook.presto.metadata.Metadata; @@ -34,6 +35,7 @@ import com.facebook.presto.sql.tree.Expression; import com.facebook.presto.sql.tree.NodeRef; import com.facebook.presto.testing.TestingSession; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import java.util.Map; @@ -56,13 +58,15 @@ public class TestClpQueryBase protected static final StandardFunctionResolution standardFunctionResolution = new FunctionResolution(functionAndTypeManager.getFunctionAndTypeResolver()); protected static final Metadata metadata = MetadataManager.createTestMetadataManager(); - protected static ClpColumnHandle regionId = new ClpColumnHandle("region.Id", BIGINT, true); - protected static ClpColumnHandle regionName = new ClpColumnHandle("region.Name", VARCHAR, true); + protected static ClpColumnHandle region = new ClpColumnHandle("region", RowType.from(ImmutableList.of( + RowType.field("Id", BIGINT), + RowType.field("Name", VARCHAR) + )), true); protected static ClpColumnHandle city = new ClpColumnHandle("city", VARCHAR, true); protected static final ClpColumnHandle fare = new ClpColumnHandle("fare", DOUBLE, true); protected static final ClpColumnHandle isHoliday = new ClpColumnHandle("isHoliday", BOOLEAN, true); protected static final Map variableToColumnHandleMap = - Stream.of(regionId, regionName, city, fare, isHoliday) + Stream.of(region, city, fare, isHoliday) .collect(toMap( ch -> new VariableReferenceExpression(Optional.empty(), ch.getColumnName(), ch.getColumnType()), ch -> ch)); From 95a0db18c21079fed77315ac25045680dd4571fe Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 28 Mar 2025 16:37:08 +0000 Subject: [PATCH 108/126] update unit test cases for row type --- .../presto/ClpFilterToKqlConverter.java | 14 +++---- .../metadata/ClpMySQLMetadataProvider.java | 6 --- .../yscope/presto/metadata/ClpSchemaTree.java | 42 ++++++++++++++----- .../com/yscope/presto/TestClpMetadata.java | 14 +++---- .../yscope/presto/TestClpPlanOptimizer.java | 30 ++++++------- 5 files changed, 57 insertions(+), 49 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java index 2e6f53e93ee66..b80d5112c4201 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java @@ -252,7 +252,7 @@ private ClpExpression handleDeferenceImpl(RowExpression node) if (!baseString.getDefinition().isPresent()) { return new ClpExpression(node); } - return new ClpExpression(baseString.getDefinition() + "." + fieldName); + return new ClpExpression(baseString.getDefinition().get() + "." + fieldName); } private ClpExpression handleDereference(SpecialFormExpression expression) @@ -332,12 +332,12 @@ private Optional parseSubstringCall(CallExpression callExpression) return Optional.empty(); } - RowExpression arg0 = callExpression.getArguments().get(0); - if (!(arg0 instanceof VariableReferenceExpression)) { + ClpExpression variable = callExpression.getArguments().get(0).accept(this, null); + if (!variable.getDefinition().isPresent()) { return Optional.empty(); } - String varName = getVariableName((VariableReferenceExpression) arg0); + String varName = variable.getDefinition().get(); RowExpression startExpression = callExpression.getArguments().get(1); RowExpression lengthExpression = null; if (argCount == 3) { @@ -518,15 +518,13 @@ private ClpExpression handleLogicalBinary(OperatorType operator, CallExpression return new ClpExpression(node); } - boolean leftIsVariable = (left instanceof VariableReferenceExpression); - boolean rightIsVariable = (right instanceof VariableReferenceExpression); boolean leftIsConstant = (left instanceof ConstantExpression); boolean rightIsConstant = (right instanceof ConstantExpression); Type leftType = left.getType(); Type rightType = right.getType(); - if (leftIsVariable && rightIsConstant) { + if (rightIsConstant) { return buildClpExpression( leftDefinition.get(), // variable rightDefinition.get(), // literal @@ -534,7 +532,7 @@ private ClpExpression handleLogicalBinary(OperatorType operator, CallExpression rightType, node); } - else if (leftIsConstant && rightIsVariable) { + else if (leftIsConstant) { OperatorType newOperator = OperatorType.flip(operator); return buildClpExpression( rightDefinition.get(), // variable diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java index 511b40c898f87..9974c716100bf 100644 --- a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java @@ -14,12 +14,6 @@ package com.yscope.presto.metadata; import com.facebook.airlift.log.Logger; -import com.facebook.presto.common.type.ArrayType; -import com.facebook.presto.common.type.BigintType; -import com.facebook.presto.common.type.BooleanType; -import com.facebook.presto.common.type.DoubleType; -import com.facebook.presto.common.type.Type; -import com.facebook.presto.common.type.VarcharType; import com.facebook.presto.spi.SchemaTableName; import com.yscope.presto.ClpColumnHandle; import com.yscope.presto.ClpConfig; diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaTree.java b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaTree.java index dbf0e3c04ba16..4256497d43dea 100644 --- a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaTree.java +++ b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaTree.java @@ -1,3 +1,16 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.yscope.presto.metadata; import com.facebook.presto.common.type.ArrayType; @@ -10,7 +23,6 @@ import com.facebook.presto.spi.PrestoException; import com.yscope.presto.ClpColumnHandle; import com.yscope.presto.ClpErrorCode; -import com.yscope.presto.ClpExpression; import java.util.ArrayList; import java.util.HashMap; @@ -20,13 +32,16 @@ import java.util.Optional; import java.util.Set; -public class ClpSchemaTree { - static class ClpNode { +public class ClpSchemaTree +{ + static class ClpNode + { Type type; // Only non-null for leaf nodes Map children = new HashMap<>(); Set conflictingBaseNames = new HashSet<>(); - boolean isLeaf() { + boolean isLeaf() + { return children.isEmpty(); } } @@ -60,7 +75,8 @@ private Type mapColumnType(byte type) } } - public void addColumn(String fullName, byte type) { + public void addColumn(String fullName, byte type) + { Type prestoType = mapColumnType(type); String[] path = fullName.split("\\."); ClpNode current = root; @@ -81,7 +97,7 @@ public void addColumn(String fullName, byte type) { ClpNode existing = current.children.get(leafName); if (existing.type != null && !existing.type.equals(prestoType)) { - String existingSuffix = existing.type.getDisplayName().toLowerCase(); + String existingSuffix = existing.type.getDisplayName(); String renamedExisting = leafName + "_" + existingSuffix; current.children.remove(leafName); @@ -90,12 +106,13 @@ public void addColumn(String fullName, byte type) { current.conflictingBaseNames.add(leafName); conflictDetected = true; } - } else if (current.conflictingBaseNames.contains(leafName)) { + } + else if (current.conflictingBaseNames.contains(leafName)) { conflictDetected = true; } if (conflictDetected) { - String newSuffix = prestoType.getDisplayName().toLowerCase(); + String newSuffix = prestoType.getDisplayName(); finalLeafName = leafName + "_" + newSuffix; } } @@ -105,14 +122,16 @@ public void addColumn(String fullName, byte type) { current.children.put(finalLeafName, leaf); } - public List collectColumnHandles() { + public List collectColumnHandles() + { List columns = new ArrayList<>(); for (Map.Entry entry : root.children.entrySet()) { String name = entry.getKey(); ClpNode child = entry.getValue(); if (child.isLeaf()) { columns.add(new ClpColumnHandle(name, child.type, true)); - } else { + } + else { Type rowType = buildRowType(child); columns.add(new ClpColumnHandle(name, rowType, true)); } @@ -120,7 +139,8 @@ public List collectColumnHandles() { return columns; } - private Type buildRowType(ClpNode node) { + private Type buildRowType(ClpNode node) + { List fields = new ArrayList<>(); for (Map.Entry entry : node.children.entrySet()) { String name = entry.getKey(); diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java index 303aa3c206186..e8e0b9d1f54a0 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java @@ -16,6 +16,7 @@ import com.facebook.presto.common.type.BigintType; import com.facebook.presto.common.type.BooleanType; import com.facebook.presto.common.type.DoubleType; +import com.facebook.presto.common.type.RowType; import com.facebook.presto.common.type.VarcharType; import com.facebook.presto.spi.ColumnMetadata; import com.facebook.presto.spi.ConnectorTableMetadata; @@ -105,7 +106,6 @@ public void setUp() new Pair<>("a", ClpNodeType.VarString), new Pair<>("b", ClpNodeType.Float), new Pair<>("b", ClpNodeType.ClpString), - new Pair<>("c", ClpNodeType.Float), new Pair<>("c.d", ClpNodeType.Boolean), new Pair<>("c.e", ClpNodeType.VarString)); @@ -179,16 +179,12 @@ public void testGetTableMetadata() .setNullable(true) .build()); columnMetadata.add(ColumnMetadata.builder() - .setName("c.d") - .setType(BooleanType.BOOLEAN) + .setName("c") + .setType(RowType.from(ImmutableList.of( + RowType.field("d", BooleanType.BOOLEAN), + RowType.field("e", VarcharType.VARCHAR)))) .setNullable(true) .build()); - columnMetadata.add(ColumnMetadata.builder() - .setName("c.e") - .setType(VarcharType.VARCHAR) - .setNullable(true) - .build()); - columnMetadata.add(ColumnMetadata.builder().setName("c").setType(DoubleType.DOUBLE).setNullable(true).build()); assertEquals(columnMetadata, new HashSet<>(tableMetadata.getColumns())); } } diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java b/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java index 3f64532ee14e8..6f6418633bf1d 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java @@ -119,15 +119,15 @@ public void testOrPushdown() testFilter("fare > 0 OR city like 'b%'", Optional.of("(fare > 0 OR city: \"b*\")"), Optional.empty(), sessionHolder); - testFilter("lower(\"region.Name\") = 'hello world' OR \"region.Id\" != 1", Optional.empty(), Optional.of("(lower(\"region.Name\") = 'hello world' OR \"region.Id\" != 1)"), + testFilter("lower(region.Name) = 'hello world' OR region.Id != 1", Optional.empty(), Optional.of("(lower(region.Name) = 'hello world' OR region.Id != 1)"), sessionHolder); // Multiple ORs - testFilter("fare > 0 OR city like 'b%' OR lower(\"region.Name\") = 'hello world' OR \"region.Id\" != 1", + testFilter("fare > 0 OR city like 'b%' OR lower(region.Name) = 'hello world' OR region.Id != 1", Optional.empty(), - Optional.of("fare > 0 OR city like 'b%' OR lower(\"region.Name\") = 'hello world' OR \"region.Id\" != 1"), + Optional.of("fare > 0 OR city like 'b%' OR lower(region.Name) = 'hello world' OR region.Id != 1"), sessionHolder); - testFilter("fare > 0 OR city like 'b%' OR \"region.Id\" != 1", + testFilter("fare > 0 OR city like 'b%' OR region.Id != 1", Optional.of("((fare > 0 OR city: \"b*\") OR NOT region.Id: 1)"), Optional.empty(), sessionHolder); @@ -139,17 +139,17 @@ public void testAndPushdown() SessionHolder sessionHolder = new SessionHolder(); testFilter("fare > 0 AND city like 'b%'", Optional.of("(fare > 0 AND city: \"b*\")"), Optional.empty(), sessionHolder); - testFilter("lower(\"region.Name\") = 'hello world' AND \"region.Id\" != 1", Optional.of("(NOT region.Id: 1)"), Optional.of("lower(\"region.Name\") = 'hello world'"), + testFilter("lower(region.Name) = 'hello world' AND region.Id != 1", Optional.of("(NOT region.Id: 1)"), Optional.of("lower(region.Name) = 'hello world'"), sessionHolder); // Multiple ANDs - testFilter("fare > 0 AND city like 'b%' AND lower(\"region.Name\") = 'hello world' AND \"region.Id\" != 1", + testFilter("fare > 0 AND city like 'b%' AND lower(region.Name) = 'hello world' AND region.Id != 1", Optional.of("(((fare > 0 AND city: \"b*\")) AND NOT region.Id: 1)"), - Optional.of("(lower(\"region.Name\") = 'hello world')"), + Optional.of("(lower(region.Name) = 'hello world')"), sessionHolder); - testFilter("fare > 0 AND city like '%b%' AND lower(\"region.Name\") = 'hello world' AND \"region.Id\" != 1", + testFilter("fare > 0 AND city like '%b%' AND lower(region.Name) = 'hello world' AND region.Id != 1", Optional.of("(((fare > 0)) AND NOT region.Id: 1)"), - Optional.of("city like '%b%' AND lower(\"region.Name\") = 'hello world'"), + Optional.of("city like '%b%' AND lower(region.Name) = 'hello world'"), sessionHolder); } @@ -158,8 +158,8 @@ public void testNotPushdown() { SessionHolder sessionHolder = new SessionHolder(); - testFilter("\"region.Name\" NOT LIKE 'hello%'", Optional.of("NOT region.Name: \"hello*\""), Optional.empty(), sessionHolder); - testFilter("NOT (\"region.Name\" LIKE 'hello%')", Optional.of("NOT region.Name: \"hello*\""), Optional.empty(), sessionHolder); + testFilter("region.Name NOT LIKE 'hello%'", Optional.of("NOT region.Name: \"hello*\""), Optional.empty(), sessionHolder); + testFilter("NOT (region.Name LIKE 'hello%')", Optional.of("NOT region.Name: \"hello*\""), Optional.empty(), sessionHolder); testFilter("city != 'hello world'", Optional.of("NOT city: \"hello world\""), Optional.empty(), sessionHolder); testFilter("city <> 'hello world'", Optional.of("NOT city: \"hello world\""), Optional.empty(), sessionHolder); testFilter("NOT (city = 'hello world')", Optional.of("NOT city: \"hello world\""), Optional.empty(), sessionHolder); @@ -194,13 +194,13 @@ public void testComplexPushdown() { SessionHolder sessionHolder = new SessionHolder(); - testFilter("(fare > 0 OR city like 'b%') AND (lower(\"region.Name\") = 'hello world' OR city IS NULL)", + testFilter("(fare > 0 OR city like 'b%') AND (lower(region.Name) = 'hello world' OR city IS NULL)", Optional.of("((fare > 0 OR city: \"b*\"))"), - Optional.of("(lower(\"region.Name\") = 'hello world' OR city IS NULL)"), + Optional.of("(lower(region.Name) = 'hello world' OR city IS NULL)"), sessionHolder); - testFilter("\"region.Id\" = 1 AND (fare > 0 OR city not like 'b%') AND (lower(\"region.Name\") = 'hello world' OR city IS NULL)", + testFilter("region.Id = 1 AND (fare > 0 OR city not like 'b%') AND (lower(region.Name) = 'hello world' OR city IS NULL)", Optional.of("((region.Id: 1 AND (fare > 0 OR NOT city: \"b*\")))"), - Optional.of("lower(\"region.Name\") = 'hello world' OR city IS NULL"), + Optional.of("lower(region.Name) = 'hello world' OR city IS NULL"), sessionHolder); } } From 3dbedd2b473ba527ecb640f9459599e141e49662 Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 28 Mar 2025 16:41:50 +0000 Subject: [PATCH 109/126] revert lowercase change back --- .../java/com/facebook/presto/metadata/MetadataManager.java | 3 +-- .../main/java/com/facebook/presto/spi/ColumnMetadata.java | 4 ++-- .../main/java/com/facebook/presto/spi/VariableAllocator.java | 5 +++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/presto-main/src/main/java/com/facebook/presto/metadata/MetadataManager.java b/presto-main/src/main/java/com/facebook/presto/metadata/MetadataManager.java index cc4a1e4c5bc40..8a2b1ce55dfc8 100644 --- a/presto-main/src/main/java/com/facebook/presto/metadata/MetadataManager.java +++ b/presto-main/src/main/java/com/facebook/presto/metadata/MetadataManager.java @@ -511,8 +511,7 @@ public Map getColumnHandles(Session session, TableHandle t ImmutableMap.Builder map = ImmutableMap.builder(); for (Entry mapEntry : handles.entrySet()) { -// map.put(mapEntry.getKey().toLowerCase(ENGLISH), mapEntry.getValue()); - map.put(mapEntry.getKey(), mapEntry.getValue()); + map.put(mapEntry.getKey().toLowerCase(ENGLISH), mapEntry.getValue()); } return map.build(); } diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/ColumnMetadata.java b/presto-spi/src/main/java/com/facebook/presto/spi/ColumnMetadata.java index a25dd41b10b04..8d26f39cd0f8e 100644 --- a/presto-spi/src/main/java/com/facebook/presto/spi/ColumnMetadata.java +++ b/presto-spi/src/main/java/com/facebook/presto/spi/ColumnMetadata.java @@ -25,6 +25,7 @@ import static com.facebook.presto.spi.SchemaUtil.checkNotEmpty; import static java.util.Collections.emptyMap; import static java.util.Collections.unmodifiableMap; +import static java.util.Locale.ENGLISH; import static java.util.Objects.requireNonNull; public class ColumnMetadata @@ -79,8 +80,7 @@ public ColumnMetadata(String name, Type type, boolean nullable, String comment, requireNonNull(type, "type is null"); requireNonNull(properties, "properties is null"); -// this.name = name.toLowerCase(ENGLISH); - this.name = name; + this.name = name.toLowerCase(ENGLISH); this.type = type; this.comment = comment; this.extraInfo = extraInfo; diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/VariableAllocator.java b/presto-spi/src/main/java/com/facebook/presto/spi/VariableAllocator.java index fd8ba13174526..edf6d015d5ba8 100644 --- a/presto-spi/src/main/java/com/facebook/presto/spi/VariableAllocator.java +++ b/presto-spi/src/main/java/com/facebook/presto/spi/VariableAllocator.java @@ -27,11 +27,12 @@ import java.util.stream.Collectors; import static java.util.Collections.unmodifiableMap; +import static java.util.Locale.ENGLISH; import static java.util.Objects.requireNonNull; public class VariableAllocator { - protected static final Pattern DISALLOWED_CHAR_PATTERN = Pattern.compile("[^.a-zA-Z0-9_\\-$]+"); + protected static final Pattern DISALLOWED_CHAR_PATTERN = Pattern.compile("[^a-zA-Z0-9_\\-$]+"); protected final Map variables; protected int nextId; @@ -97,7 +98,7 @@ public VariableReferenceExpression newVariable(Optional sourceLo requireNonNull(type, "type is null"); // TODO: workaround for the fact that QualifiedName lowercases parts -// nameHint = nameHint.toLowerCase(ENGLISH); + nameHint = nameHint.toLowerCase(ENGLISH); // don't strip the tail if the only _ is the first character int index = nameHint.lastIndexOf("_"); From 4b6fd826b3bdb5406d6bda534d04bc2e1531968a Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 28 Mar 2025 20:56:36 +0000 Subject: [PATCH 110/126] support row type for more expressions --- .../main/java/com/yscope/presto/ClpErrorCode.java | 4 ++-- .../com/yscope/presto/ClpFilterToKqlConverter.java | 14 ++++++-------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpErrorCode.java b/presto-clp/src/main/java/com/yscope/presto/ClpErrorCode.java index 25dcd9fb5192b..e19e5ef2fa41c 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpErrorCode.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpErrorCode.java @@ -22,10 +22,10 @@ public enum ClpErrorCode implements ErrorCodeSupplier { - CLP_UNSUPPORTED_TYPE(0, EXTERNAL), CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION(0, EXTERNAL), CLP_UNSUPPORTED_METADATA_SOURCE(1, EXTERNAL), - CLP_UNSUPPORTED_SPLIT_SOURCE(2, EXTERNAL); + CLP_UNSUPPORTED_SPLIT_SOURCE(2, EXTERNAL), + CLP_UNSUPPORTED_TYPE(3, EXTERNAL); private final ErrorCode errorCode; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java index b80d5112c4201..4a1e4a8044c5d 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java @@ -159,10 +159,11 @@ private ClpExpression handleOr(SpecialFormExpression node) private ClpExpression handleIn(SpecialFormExpression node) { - if (!(node.getArguments().get(0) instanceof VariableReferenceExpression)) { + ClpExpression variable = node.getArguments().get(0).accept(this, null); + if (!variable.getDefinition().isPresent()) { return new ClpExpression(node); } - String variableName = getVariableName((VariableReferenceExpression) node.getArguments().get(0)); + String variableName = variable.getDefinition().get(); StringBuilder queryBuilder = new StringBuilder(); queryBuilder.append("("); for (RowExpression argument : node.getArguments().subList(1, node.getArguments().size())) { @@ -191,10 +192,6 @@ private ClpExpression handleIsNull(SpecialFormExpression node) "IS NULL operator must have exactly one argument. Received: " + node); } - if (!(node.getArguments().get(0) instanceof VariableReferenceExpression)) { - return new ClpExpression(node); - } - ClpExpression expression = node.getArguments().get(0).accept(this, null); if (!expression.getDefinition().isPresent()) { return new ClpExpression(node); @@ -267,11 +264,12 @@ private ClpExpression handleLike(CallExpression node) throw new PrestoException(CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, "LIKE operator must have exactly two arguments. Received: " + node); } - if (!(node.getArguments().get(0) instanceof VariableReferenceExpression)) { + ClpExpression variable = node.getArguments().get(0).accept(this, null); + if (!variable.getDefinition().isPresent()) { return new ClpExpression(node); } - String variableName = getVariableName((VariableReferenceExpression) node.getArguments().get(0)); + String variableName = variable.getDefinition().get(); RowExpression argument = node.getArguments().get(1); String pattern; From 9a22e2a36f4a7abd642d2c2c7b33398707ec7642 Mon Sep 17 00:00:00 2001 From: wraymo Date: Sat, 29 Mar 2025 02:22:34 +0000 Subject: [PATCH 111/126] test nested types and array case --- .../com/yscope/presto/TestClpMetadata.java | 12 ++- .../yscope/presto/TestClpPlanOptimizer.java | 92 +++++++++---------- .../com/yscope/presto/TestClpQueryBase.java | 13 +-- 3 files changed, 64 insertions(+), 53 deletions(-) diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java index e8e0b9d1f54a0..fc2dfc58ff118 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java @@ -13,6 +13,7 @@ */ package com.yscope.presto; +import com.facebook.presto.common.type.ArrayType; import com.facebook.presto.common.type.BigintType; import com.facebook.presto.common.type.BooleanType; import com.facebook.presto.common.type.DoubleType; @@ -107,7 +108,8 @@ public void setUp() new Pair<>("b", ClpNodeType.Float), new Pair<>("b", ClpNodeType.ClpString), new Pair<>("c.d", ClpNodeType.Boolean), - new Pair<>("c.e", ClpNodeType.VarString)); + new Pair<>("c.e", ClpNodeType.VarString), + new Pair<>("f.g.h", ClpNodeType.UnstructuredArray)); try (PreparedStatement pstmt = conn.prepareStatement(insertColumnMetadataSQL)) { for (Pair record : records) { @@ -185,6 +187,14 @@ public void testGetTableMetadata() RowType.field("e", VarcharType.VARCHAR)))) .setNullable(true) .build()); + columnMetadata.add(ColumnMetadata.builder() + .setName("f") + .setType(RowType.from(ImmutableList.of( + RowType.field("g", + RowType.from(ImmutableList.of( + RowType.field("h", new ArrayType(VarcharType.VARCHAR)))))))) + .setNullable(true) + .build()); assertEquals(columnMetadata, new HashSet<>(tableMetadata.getColumns())); } } diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java b/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java index 6f6418633bf1d..18539f6fc9ae6 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java @@ -57,23 +57,23 @@ public void testStringMatchPushdown() SessionHolder sessionHolder = new SessionHolder(); // Exact match - testFilter("city = 'hello world'", Optional.of("city: \"hello world\""), Optional.empty(), sessionHolder); - testFilter("'hello world' = city", Optional.of("city: \"hello world\""), Optional.empty(), sessionHolder); + testFilter("city.Name = 'hello world'", Optional.of("city.Name: \"hello world\""), Optional.empty(), sessionHolder); + testFilter("'hello world' = city.Name", Optional.of("city.Name: \"hello world\""), Optional.empty(), sessionHolder); // Like predicates that are transformed into substring match - testFilter("city like 'hello%'", Optional.of("city: \"hello*\""), Optional.empty(), sessionHolder); - testFilter("city like '%hello'", Optional.of("city: \"*hello\""), Optional.empty(), sessionHolder); + testFilter("city.Name like 'hello%'", Optional.of("city.Name: \"hello*\""), Optional.empty(), sessionHolder); + testFilter("city.Name like '%hello'", Optional.of("city.Name: \"*hello\""), Optional.empty(), sessionHolder); // Like predicates that are transformed into CARDINALITY(SPLIT(x, 'some string', 2)) = 2 form, and they are not pushed down for now - testFilter("city like '%hello%'", Optional.empty(), Optional.of("city like '%hello%'"), sessionHolder); + testFilter("city.Name like '%hello%'", Optional.empty(), Optional.of("city.Name like '%hello%'"), sessionHolder); // Like predicates that are kept in the original forms - testFilter("city like 'hello_'", Optional.of("city: \"hello?\""), Optional.empty(), sessionHolder); - testFilter("city like '_hello'", Optional.of("city: \"?hello\""), Optional.empty(), sessionHolder); - testFilter("city like 'hello_w%'", Optional.of("city: \"hello?w*\""), Optional.empty(), sessionHolder); - testFilter("city like '%hello_w'", Optional.of("city: \"*hello?w\""), Optional.empty(), sessionHolder); - testFilter("city like 'hello%world'", Optional.of("city: \"hello*world\""), Optional.empty(), sessionHolder); - testFilter("city like 'hello%wor%ld'", Optional.of("city: \"hello*wor*ld\""), Optional.empty(), sessionHolder); + testFilter("city.Name like 'hello_'", Optional.of("city.Name: \"hello?\""), Optional.empty(), sessionHolder); + testFilter("city.Name like '_hello'", Optional.of("city.Name: \"?hello\""), Optional.empty(), sessionHolder); + testFilter("city.Name like 'hello_w%'", Optional.of("city.Name: \"hello?w*\""), Optional.empty(), sessionHolder); + testFilter("city.Name like '%hello_w'", Optional.of("city.Name: \"*hello?w\""), Optional.empty(), sessionHolder); + testFilter("city.Name like 'hello%world'", Optional.of("city.Name: \"hello*world\""), Optional.empty(), sessionHolder); + testFilter("city.Name like 'hello%wor%ld'", Optional.of("city.Name: \"hello*wor*ld\""), Optional.empty(), sessionHolder); } @Test @@ -81,14 +81,14 @@ public void testSubStringPushdown() { SessionHolder sessionHolder = new SessionHolder(); - testFilter("substr(city, 1, 2) = 'he'", Optional.of("city: \"he*\""), Optional.empty(), sessionHolder); - testFilter("substr(city, 5, 2) = 'he'", Optional.of("city: \"????he*\""), Optional.empty(), sessionHolder); - testFilter("substr(city, 5) = 'he'", Optional.of("city: \"????he\""), Optional.empty(), sessionHolder); - testFilter("substr(city, -2) = 'he'", Optional.of("city: \"*he\""), Optional.empty(), sessionHolder); + testFilter("substr(city.Name, 1, 2) = 'he'", Optional.of("city.Name: \"he*\""), Optional.empty(), sessionHolder); + testFilter("substr(city.Name, 5, 2) = 'he'", Optional.of("city.Name: \"????he*\""), Optional.empty(), sessionHolder); + testFilter("substr(city.Name, 5) = 'he'", Optional.of("city.Name: \"????he\""), Optional.empty(), sessionHolder); + testFilter("substr(city.Name, -2) = 'he'", Optional.of("city.Name: \"*he\""), Optional.empty(), sessionHolder); // Invalid substring index is not pushed down - testFilter("substr(city, 1, 5) = 'he'", Optional.empty(), Optional.of("substr(city, 1, 5) = 'he'"), sessionHolder); - testFilter("substr(city, -5) = 'he'", Optional.empty(), Optional.of("substr(city, -5) = 'he'"), sessionHolder); + testFilter("substr(city.Name, 1, 5) = 'he'", Optional.empty(), Optional.of("substr(city.Name, 1, 5) = 'he'"), sessionHolder); + testFilter("substr(city.Name, -5) = 'he'", Optional.empty(), Optional.of("substr(city.Name, -5) = 'he'"), sessionHolder); } @Test @@ -117,18 +117,18 @@ public void testOrPushdown() { SessionHolder sessionHolder = new SessionHolder(); - testFilter("fare > 0 OR city like 'b%'", Optional.of("(fare > 0 OR city: \"b*\")"), Optional.empty(), + testFilter("fare > 0 OR city.Name like 'b%'", Optional.of("(fare > 0 OR city.Name: \"b*\")"), Optional.empty(), sessionHolder); - testFilter("lower(region.Name) = 'hello world' OR region.Id != 1", Optional.empty(), Optional.of("(lower(region.Name) = 'hello world' OR region.Id != 1)"), + testFilter("lower(city.Region.Name) = 'hello world' OR city.Region.Id != 1", Optional.empty(), Optional.of("(lower(city.Region.Name) = 'hello world' OR city.Region.Id != 1)"), sessionHolder); // Multiple ORs - testFilter("fare > 0 OR city like 'b%' OR lower(region.Name) = 'hello world' OR region.Id != 1", + testFilter("fare > 0 OR city.Name like 'b%' OR lower(city.Region.Name) = 'hello world' OR city.Region.Id != 1", Optional.empty(), - Optional.of("fare > 0 OR city like 'b%' OR lower(region.Name) = 'hello world' OR region.Id != 1"), + Optional.of("fare > 0 OR city.Name like 'b%' OR lower(city.Region.Name) = 'hello world' OR city.Region.Id != 1"), sessionHolder); - testFilter("fare > 0 OR city like 'b%' OR region.Id != 1", - Optional.of("((fare > 0 OR city: \"b*\") OR NOT region.Id: 1)"), + testFilter("fare > 0 OR city.Name like 'b%' OR city.Region.Id != 1", + Optional.of("((fare > 0 OR city.Name: \"b*\") OR NOT city.Region.Id: 1)"), Optional.empty(), sessionHolder); } @@ -138,18 +138,18 @@ public void testAndPushdown() { SessionHolder sessionHolder = new SessionHolder(); - testFilter("fare > 0 AND city like 'b%'", Optional.of("(fare > 0 AND city: \"b*\")"), Optional.empty(), sessionHolder); - testFilter("lower(region.Name) = 'hello world' AND region.Id != 1", Optional.of("(NOT region.Id: 1)"), Optional.of("lower(region.Name) = 'hello world'"), + testFilter("fare > 0 AND city.Name like 'b%'", Optional.of("(fare > 0 AND city.Name: \"b*\")"), Optional.empty(), sessionHolder); + testFilter("lower(city.Region.Name) = 'hello world' AND city.Region.Id != 1", Optional.of("(NOT city.Region.Id: 1)"), Optional.of("lower(city.Region.Name) = 'hello world'"), sessionHolder); // Multiple ANDs - testFilter("fare > 0 AND city like 'b%' AND lower(region.Name) = 'hello world' AND region.Id != 1", - Optional.of("(((fare > 0 AND city: \"b*\")) AND NOT region.Id: 1)"), - Optional.of("(lower(region.Name) = 'hello world')"), + testFilter("fare > 0 AND city.Name like 'b%' AND lower(city.Region.Name) = 'hello world' AND city.Region.Id != 1", + Optional.of("(((fare > 0 AND city.Name: \"b*\")) AND NOT city.Region.Id: 1)"), + Optional.of("(lower(city.Region.Name) = 'hello world')"), sessionHolder); - testFilter("fare > 0 AND city like '%b%' AND lower(region.Name) = 'hello world' AND region.Id != 1", - Optional.of("(((fare > 0)) AND NOT region.Id: 1)"), - Optional.of("city like '%b%' AND lower(region.Name) = 'hello world'"), + testFilter("fare > 0 AND city.Name like '%b%' AND lower(city.Region.Name) = 'hello world' AND city.Region.Id != 1", + Optional.of("(((fare > 0)) AND NOT city.Region.Id: 1)"), + Optional.of("city.Name like '%b%' AND lower(city.Region.Name) = 'hello world'"), sessionHolder); } @@ -158,18 +158,18 @@ public void testNotPushdown() { SessionHolder sessionHolder = new SessionHolder(); - testFilter("region.Name NOT LIKE 'hello%'", Optional.of("NOT region.Name: \"hello*\""), Optional.empty(), sessionHolder); - testFilter("NOT (region.Name LIKE 'hello%')", Optional.of("NOT region.Name: \"hello*\""), Optional.empty(), sessionHolder); - testFilter("city != 'hello world'", Optional.of("NOT city: \"hello world\""), Optional.empty(), sessionHolder); - testFilter("city <> 'hello world'", Optional.of("NOT city: \"hello world\""), Optional.empty(), sessionHolder); - testFilter("NOT (city = 'hello world')", Optional.of("NOT city: \"hello world\""), Optional.empty(), sessionHolder); + testFilter("city.Region.Name NOT LIKE 'hello%'", Optional.of("NOT city.Region.Name: \"hello*\""), Optional.empty(), sessionHolder); + testFilter("NOT (city.Region.Name LIKE 'hello%')", Optional.of("NOT city.Region.Name: \"hello*\""), Optional.empty(), sessionHolder); + testFilter("city.Name != 'hello world'", Optional.of("NOT city.Name: \"hello world\""), Optional.empty(), sessionHolder); + testFilter("city.Name <> 'hello world'", Optional.of("NOT city.Name: \"hello world\""), Optional.empty(), sessionHolder); + testFilter("NOT (city.Name = 'hello world')", Optional.of("NOT city.Name: \"hello world\""), Optional.empty(), sessionHolder); testFilter("fare != 0", Optional.of("NOT fare: 0"), Optional.empty(), sessionHolder); testFilter("fare <> 0", Optional.of("NOT fare: 0"), Optional.empty(), sessionHolder); testFilter("NOT (fare = 0)", Optional.of("NOT fare: 0"), Optional.empty(), sessionHolder); // Multiple NOTs testFilter("NOT (NOT fare = 0)", Optional.of("NOT NOT fare: 0"), Optional.empty(), sessionHolder); - testFilter("NOT (fare = 0 AND city = 'hello world')", Optional.of("NOT (fare: 0 AND city: \"hello world\")"), Optional.empty(), sessionHolder); + testFilter("NOT (fare = 0 AND city.Name = 'hello world')", Optional.of("NOT (fare: 0 AND city.Name: \"hello world\")"), Optional.empty(), sessionHolder); } @Test @@ -177,7 +177,7 @@ public void testInPushdown() { SessionHolder sessionHolder = new SessionHolder(); - testFilter("city IN ('hello world', 'hello world 2')", Optional.of("(city: \"hello world\" OR city: \"hello world 2\")"), Optional.empty(), sessionHolder); + testFilter("city.Name IN ('hello world', 'hello world 2')", Optional.of("(city.Name: \"hello world\" OR city.Name: \"hello world 2\")"), Optional.empty(), sessionHolder); } @Test @@ -185,8 +185,8 @@ public void testIsNullPushdown() { SessionHolder sessionHolder = new SessionHolder(); - testFilter("city IS NULL", Optional.of("NOT city: *"), Optional.empty(), sessionHolder); - testFilter("city IS NOT NULL", Optional.of("NOT NOT city: *"), Optional.empty(), sessionHolder); + testFilter("city.Name IS NULL", Optional.of("NOT city.Name: *"), Optional.empty(), sessionHolder); + testFilter("city.Name IS NOT NULL", Optional.of("NOT NOT city.Name: *"), Optional.empty(), sessionHolder); } @Test @@ -194,13 +194,13 @@ public void testComplexPushdown() { SessionHolder sessionHolder = new SessionHolder(); - testFilter("(fare > 0 OR city like 'b%') AND (lower(region.Name) = 'hello world' OR city IS NULL)", - Optional.of("((fare > 0 OR city: \"b*\"))"), - Optional.of("(lower(region.Name) = 'hello world' OR city IS NULL)"), + testFilter("(fare > 0 OR city.Name like 'b%') AND (lower(city.Region.Name) = 'hello world' OR city.Name IS NULL)", + Optional.of("((fare > 0 OR city.Name: \"b*\"))"), + Optional.of("(lower(city.Region.Name) = 'hello world' OR city.Name IS NULL)"), sessionHolder); - testFilter("region.Id = 1 AND (fare > 0 OR city not like 'b%') AND (lower(region.Name) = 'hello world' OR city IS NULL)", - Optional.of("((region.Id: 1 AND (fare > 0 OR NOT city: \"b*\")))"), - Optional.of("lower(region.Name) = 'hello world' OR city IS NULL"), + testFilter("city.Region.Id = 1 AND (fare > 0 OR city.Name not like 'b%') AND (lower(city.Region.Name) = 'hello world' OR city.Name IS NULL)", + Optional.of("((city.Region.Id: 1 AND (fare > 0 OR NOT city.Name: \"b*\")))"), + Optional.of("lower(city.Region.Name) = 'hello world' OR city.Name IS NULL"), sessionHolder); } } diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpQueryBase.java b/presto-clp/src/test/java/com/yscope/presto/TestClpQueryBase.java index a039dc2056944..8a73c0788a263 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpQueryBase.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpQueryBase.java @@ -58,15 +58,16 @@ public class TestClpQueryBase protected static final StandardFunctionResolution standardFunctionResolution = new FunctionResolution(functionAndTypeManager.getFunctionAndTypeResolver()); protected static final Metadata metadata = MetadataManager.createTestMetadataManager(); - protected static ClpColumnHandle region = new ClpColumnHandle("region", RowType.from(ImmutableList.of( - RowType.field("Id", BIGINT), - RowType.field("Name", VARCHAR) - )), true); - protected static ClpColumnHandle city = new ClpColumnHandle("city", VARCHAR, true); + protected static ClpColumnHandle city = new ClpColumnHandle("city", RowType.from(ImmutableList.of( + RowType.field("Name", VARCHAR), + RowType.field("Region", RowType.from(ImmutableList.of( + RowType.field("Id", BIGINT), + RowType.field("Name", VARCHAR) + ))))), true); protected static final ClpColumnHandle fare = new ClpColumnHandle("fare", DOUBLE, true); protected static final ClpColumnHandle isHoliday = new ClpColumnHandle("isHoliday", BOOLEAN, true); protected static final Map variableToColumnHandleMap = - Stream.of(region, city, fare, isHoliday) + Stream.of(city, fare, isHoliday) .collect(toMap( ch -> new VariableReferenceExpression(Optional.empty(), ch.getColumnName(), ch.getColumnType()), ch -> ch)); From 62536507b608d09c20659f94b5039cd2b9664a2a Mon Sep 17 00:00:00 2001 From: wraymo Date: Mon, 31 Mar 2025 00:40:11 +0000 Subject: [PATCH 112/126] improve clp docs --- presto-docs/src/main/sphinx/connector/clp.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/presto-docs/src/main/sphinx/connector/clp.rst b/presto-docs/src/main/sphinx/connector/clp.rst index bb77b064b3abd..609442ce7dac4 100644 --- a/presto-docs/src/main/sphinx/connector/clp.rst +++ b/presto-docs/src/main/sphinx/connector/clp.rst @@ -168,6 +168,12 @@ CLP Type Presto Type (others) (unsupported) ====================== ==================== +String Types +^^^^^^^^^^^^ + +In CLP, we have three distinct string types: ``ClpString`` (strings with whitespace), ``VarString`` (strings without +whitespace), and ``DateString`` (strings representing dates). Currently, all three are mapped to Presto's ``VARCHAR`` +type. Array Types ^^^^^^^^^^^ From 0201632ff3e8fc88c665bc2dbea8a3cc13d454b9 Mon Sep 17 00:00:00 2001 From: wraymo Date: Mon, 31 Mar 2025 01:05:01 +0000 Subject: [PATCH 113/126] fix sql injection issue --- .../src/main/java/com/yscope/presto/ClpConfig.java | 10 ++++++++++ .../main/java/com/yscope/presto/ClpErrorCode.java | 3 ++- .../presto/metadata/ClpMySQLMetadataProvider.java | 13 ++++++++++++- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java index 999824adc026a..4c72549ed6aa9 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java @@ -14,6 +14,9 @@ package com.yscope.presto; import com.facebook.airlift.configuration.Config; +import com.facebook.presto.spi.PrestoException; + +import java.util.regex.Pattern; public class ClpConfig { @@ -49,6 +52,8 @@ public enum SplitSource // TODO(Rui): We need to add it in the example configuration files and in Velox private SplitSource splitSource = SplitSource.MYSQL; + public static final Pattern SAFE_SQL_IDENTIFIER = Pattern.compile("^[a-zA-Z0-9_]+$"); + public boolean isPolymorphicTypeEnabled() { return polymorphicTypeEnabled; @@ -129,6 +134,11 @@ public String getMetadataTablePrefix() @Config("clp.metadata-table-prefix") public ClpConfig setMetadataTablePrefix(String metadataTablePrefix) { + if (metadataTablePrefix == null || !SAFE_SQL_IDENTIFIER.matcher(metadataTablePrefix).matches()) { + throw new PrestoException(ClpErrorCode.CLP_UNSUPPORTED_CONFIG_OPTION, "Invalid metadataTablePrefix: " + + metadataTablePrefix + ". Only alphanumeric characters and underscores are allowed."); + } + this.metadataTablePrefix = metadataTablePrefix; return this; } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpErrorCode.java b/presto-clp/src/main/java/com/yscope/presto/ClpErrorCode.java index e19e5ef2fa41c..52ef0675fecd4 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpErrorCode.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpErrorCode.java @@ -25,7 +25,8 @@ public enum ClpErrorCode CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION(0, EXTERNAL), CLP_UNSUPPORTED_METADATA_SOURCE(1, EXTERNAL), CLP_UNSUPPORTED_SPLIT_SOURCE(2, EXTERNAL), - CLP_UNSUPPORTED_TYPE(3, EXTERNAL); + CLP_UNSUPPORTED_TYPE(3, EXTERNAL), + CLP_UNSUPPORTED_CONFIG_OPTION(4, EXTERNAL); private final ErrorCode errorCode; diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java index 9974c716100bf..9c1b75ffd66a8 100644 --- a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java @@ -61,6 +61,11 @@ private Connection getConnection() throws SQLException return connection; } + private boolean isValidIdentifier(String identifier) + { + return identifier != null && ClpConfig.SAFE_SQL_IDENTIFIER.matcher(identifier).matches(); + } + @Override public List listColumnHandles(SchemaTableName schemaTableName) { @@ -90,7 +95,13 @@ public List listTableNames(String schema) Statement statement = connection.createStatement(); ResultSet resultSet = statement.executeQuery(query)) { while (resultSet.next()) { - tableNames.add(resultSet.getString("table_name")); + String tableName = resultSet.getString("table_name"); + if (isValidIdentifier(tableName)) { + tableNames.add(tableName); + } + else { + log.warn("Ignoring invalid table name found in metadata: %s", tableName); + } } } catch (SQLException e) { From baa1097467ccbe535d1e84ae2055a465faf8611a Mon Sep 17 00:00:00 2001 From: wraymo Date: Mon, 31 Mar 2025 01:35:19 +0000 Subject: [PATCH 114/126] add row type to the doc --- presto-docs/src/main/sphinx/connector/clp.rst | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/presto-docs/src/main/sphinx/connector/clp.rst b/presto-docs/src/main/sphinx/connector/clp.rst index 609442ce7dac4..11304a8668d84 100644 --- a/presto-docs/src/main/sphinx/connector/clp.rst +++ b/presto-docs/src/main/sphinx/connector/clp.rst @@ -165,6 +165,7 @@ CLP Type Presto Type ``DateString`` ``VARCHAR`` ``Boolean`` ``BOOLEAN`` ``UnstructuredArray`` ``ARRAY(VARCHAR)`` +``Object`` ``ROW`` (others) (unsupported) ====================== ==================== @@ -179,9 +180,38 @@ Array Types ^^^^^^^^^^^ CLP supports two array types: ``UnstructuredArray`` and ``StructuredArray``. Unstructured arrays are stored as strings -in CLP and elements can be any type. However, in Presto arrays are homogenous, so the elements are converted to strings +in CLP and elements can be any type. However, in Presto arrays are homogeneous, so the elements are converted to strings when read. ``StructuredArray`` type is not supported yet. +Object Types +^^^^^^^^^^^^ + +CLP stores metadata using a tree structure where internal nodes may represent objects containing nested fields as their +children. In Presto, we represent internal object nodes specifically using the ``ROW`` data type, mapping each child +node as a sub-field within the ``ROW``. + + +For example, consider the JSON log: + +.. code-block:: json + + { + "msg": { + "ts": 0, + "status": "ok" + } + } + +In CLP's schema tree, ``msg`` is an internal object node with two child nodes: ``ts`` and ``status``. In Presto, we map +the ``msg`` node to a ``ROW`` type: + +.. code-block:: sql + + ROW(ts BIGINT, status VARCHAR) + +Here, the child nodes ``ts`` and ``status`` become fields within the ``ROW``, clearly reflecting the nested structure of +the original JSON. + SQL support ----------- From a80ad572ba281ca8b2445f663abac937f57f25e7 Mon Sep 17 00:00:00 2001 From: wraymo Date: Mon, 31 Mar 2025 13:50:18 +0000 Subject: [PATCH 115/126] improve the docs --- presto-docs/src/main/sphinx/connector/clp.rst | 35 +++++++++++++------ 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/presto-docs/src/main/sphinx/connector/clp.rst b/presto-docs/src/main/sphinx/connector/clp.rst index 11304a8668d84..f37f6eba7df1a 100644 --- a/presto-docs/src/main/sphinx/connector/clp.rst +++ b/presto-docs/src/main/sphinx/connector/clp.rst @@ -185,13 +185,13 @@ when read. ``StructuredArray`` type is not supported yet. Object Types ^^^^^^^^^^^^ +CLP stores metadata using a global schema tree structure that captures all possible fields from various log structures. +Internal nodes may represent objects containing nested fields as their children. In Presto, we map these internal object +nodes specifically to the ``ROW`` data type, including all subfields as fields within the ``ROW``. -CLP stores metadata using a tree structure where internal nodes may represent objects containing nested fields as their -children. In Presto, we represent internal object nodes specifically using the ``ROW`` data type, mapping each child -node as a sub-field within the ``ROW``. +For instance, consider a table containing two distinct JSON log types: - -For example, consider the JSON log: +Log Type 1: .. code-block:: json @@ -202,15 +202,30 @@ For example, consider the JSON log: } } -In CLP's schema tree, ``msg`` is an internal object node with two child nodes: ``ts`` and ``status``. In Presto, we map -the ``msg`` node to a ``ROW`` type: +Log Type 2: + +.. code-block:: json + + { + "msg": { + "ts": 1, + "status": "error", + "thread_num": 4, + "backtrace": "" + } + } + +In CLP's schema tree, these two structures are combined into a unified internal node (``msg``) with four child nodes: +``ts``, ``status``, ``thread_num`` and ``backtrace``. In Presto, we represent this combined structure using the +following ``ROW`` type: .. code-block:: sql - ROW(ts BIGINT, status VARCHAR) + ROW(ts BIGINT, status VARCHAR, thread_num BIGINT, backtrace VARCHAR) -Here, the child nodes ``ts`` and ``status`` become fields within the ``ROW``, clearly reflecting the nested structure of -the original JSON. +Each JSON log maps to this unified ``ROW`` type, with absent fields represented as ``NULL``. Thus, the child nodes +(``ts``, ``status``, ``thread_num``, ``backtrace``) become fields within the ``ROW``, clearly reflecting the nested and +varying structures of the original JSON logs. SQL support ----------- From 977deadd3344aa27d816f1c108c4e4ec7bd9f418 Mon Sep 17 00:00:00 2001 From: wraymo Date: Wed, 2 Apr 2025 19:18:57 +0000 Subject: [PATCH 116/126] remove handlePolymorphicType in ClpClient and support original names in ClpFilterToKqlConverter --- .../java/com/yscope/presto/ClpClient.java | 35 +------------------ .../presto/ClpFilterToKqlConverter.java | 7 +--- .../com/yscope/presto/ClpSplitSource.java | 4 +++ .../yscope/presto/metadata/ClpSchemaTree.java | 6 ++-- 4 files changed, 10 insertions(+), 42 deletions(-) create mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpSplitSource.java diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index 8b6821f84af50..b975fb8b6c0e2 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -38,7 +38,6 @@ public class ClpClient { private static final Logger log = Logger.get(ClpClient.class); - private final ClpConfig config; private final LoadingCache> columnHandleCache; private final LoadingCache> tableNameCache; private final ClpMetadataProvider clpMetadataProvider; @@ -47,7 +46,6 @@ public class ClpClient @Inject public ClpClient(ClpConfig config) { - this.config = requireNonNull(config, "config is null"); if (config.getMetadataSource() == ClpConfig.MetadataSource.MYSQL) { clpMetadataProvider = new ClpMySQLMetadataProvider(config); } @@ -77,11 +75,7 @@ public ClpClient(ClpConfig config) public List loadColumnHandles(SchemaTableName schemaTableName) { - List columnHandles = clpMetadataProvider.listColumnHandles(schemaTableName); - if (!config.isPolymorphicTypeEnabled()) { - return columnHandles; - } - return handlePolymorphicType(columnHandles); + return clpMetadataProvider.listColumnHandles(schemaTableName); } public List loadTableNames(String schemaName) @@ -103,31 +97,4 @@ public List listColumns(SchemaTableName schemaTableName) { return columnHandleCache.getUnchecked(schemaTableName); } - - private List handlePolymorphicType(List columnHandles) - { - Map> columnNameToColumnHandles = new HashMap<>(); - List polymorphicColumnHandles = new ArrayList<>(); - - for (ClpColumnHandle columnHandle : columnHandles) { - columnNameToColumnHandles.computeIfAbsent(columnHandle.getColumnName(), k -> new ArrayList<>()) - .add(columnHandle); - } - for (Map.Entry> entry : columnNameToColumnHandles.entrySet()) { - List columnHandleList = entry.getValue(); - if (columnHandleList.size() == 1) { - polymorphicColumnHandles.add(columnHandleList.get(0)); - } - else { - for (ClpColumnHandle columnHandle : columnHandleList) { - polymorphicColumnHandles.add(new ClpColumnHandle( - columnHandle.getColumnName() + "_" + columnHandle.getColumnType().getDisplayName(), - columnHandle.getColumnName(), - columnHandle.getColumnType(), - columnHandle.isNullable())); - } - } - } - return polymorphicColumnHandles; - } } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java index 4a1e4a8044c5d..267263bfa22cc 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java @@ -80,12 +80,7 @@ private static String getLiteralString(ConstantExpression literal) private String getVariableName(VariableReferenceExpression variable) { - String variableName = ((ClpColumnHandle) assignments.get(variable)).getColumnName(); - if (variableName.endsWith("_bigint") || variableName.endsWith("_double") || - variableName.endsWith("_varchar") || variableName.endsWith("_boolean")) { - return variableName.substring(0, variableName.lastIndexOf('_')); - } - return variableName; + return ((ClpColumnHandle) assignments.get(variable)).getOriginalColumnName(); } private ClpExpression handleNot(CallExpression node) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplitSource.java b/presto-clp/src/main/java/com/yscope/presto/ClpSplitSource.java new file mode 100644 index 0000000000000..605f8bedf5728 --- /dev/null +++ b/presto-clp/src/main/java/com/yscope/presto/ClpSplitSource.java @@ -0,0 +1,4 @@ +package com.yscope.presto; + +public class ClpSplitSource { +} diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaTree.java b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaTree.java index 4256497d43dea..6db88e9bc9d45 100644 --- a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaTree.java +++ b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaTree.java @@ -37,6 +37,7 @@ public class ClpSchemaTree static class ClpNode { Type type; // Only non-null for leaf nodes + String originalName; Map children = new HashMap<>(); Set conflictingBaseNames = new HashSet<>(); @@ -119,6 +120,7 @@ else if (current.conflictingBaseNames.contains(leafName)) { ClpNode leaf = new ClpNode(); leaf.type = prestoType; + leaf.originalName = leafName; current.children.put(finalLeafName, leaf); } @@ -129,11 +131,11 @@ public List collectColumnHandles() String name = entry.getKey(); ClpNode child = entry.getValue(); if (child.isLeaf()) { - columns.add(new ClpColumnHandle(name, child.type, true)); + columns.add(new ClpColumnHandle(name, child.originalName, child.type, true)); } else { Type rowType = buildRowType(child); - columns.add(new ClpColumnHandle(name, rowType, true)); + columns.add(new ClpColumnHandle(name, child.originalName, rowType, true)); } } return columns; From ea73a1a3b9d66a75d75b9ea3683d620a334ac4bd Mon Sep 17 00:00:00 2001 From: wraymo Date: Wed, 2 Apr 2025 19:36:12 +0000 Subject: [PATCH 117/126] improve comments --- .../java/com/yscope/presto/ClpExpression.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpExpression.java b/presto-clp/src/main/java/com/yscope/presto/ClpExpression.java index 525a8bb201857..95fca73373834 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpExpression.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpExpression.java @@ -17,9 +17,21 @@ import java.util.Optional; +/** + * Represents the result of converting a Presto RowExpression into a CLP-compatible KQL query. + * There are three possible cases: + * 1. The entire RowExpression is convertible to KQL: `definition` is set, `remainingExpression` is empty. + * 2. Part of the RowExpression is convertible: the KQL part is stored in `definition`, + * and the remaining untranslatable part is stored in `remainingExpression`. + * 3. None of the expression is convertible: the full RowExpression is stored in `remainingExpression`, + * and `definition` is empty. + */ public class ClpExpression { + // Optional KQL query string representing the fully or partially translatable part of the expression. private final Optional definition; + + // The remaining (non-translatable) portion of the RowExpression, if any. private final Optional remainingExpression; public ClpExpression(Optional definition, Optional remainingExpression) @@ -28,16 +40,19 @@ public ClpExpression(Optional definition, Optional remain this.remainingExpression = remainingExpression; } + // Creates an empty ClpExpression (no KQL definition, no remaining expression). public ClpExpression() { this (Optional.empty(), Optional.empty()); } + // Creates a ClpExpression from a fully translatable KQL string. public ClpExpression(String definition) { this(Optional.of(definition), Optional.empty()); } + // Creates a ClpExpression from a non-translatable RowExpression. public ClpExpression(RowExpression remainingExpression) { this(Optional.empty(), Optional.of(remainingExpression)); From 7bd75ee9da279c5313bd460cde775f4e514bd37c Mon Sep 17 00:00:00 2001 From: wraymo Date: Wed, 2 Apr 2025 19:52:33 +0000 Subject: [PATCH 118/126] rename splitsource and metadatasource --- .../java/com/yscope/presto/ClpClient.java | 15 ++++----- .../java/com/yscope/presto/ClpConfig.java | 32 ++++++++----------- 2 files changed, 20 insertions(+), 27 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java index b975fb8b6c0e2..cf2a482487449 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java @@ -26,10 +26,7 @@ import javax.inject.Inject; -import java.util.ArrayList; -import java.util.HashMap; import java.util.List; -import java.util.Map; import static java.util.Objects.requireNonNull; import static java.util.concurrent.TimeUnit.SECONDS; @@ -46,20 +43,20 @@ public class ClpClient @Inject public ClpClient(ClpConfig config) { - if (config.getMetadataSource() == ClpConfig.MetadataSource.MYSQL) { + if (config.getMetadataProviderType() == ClpConfig.MetadataProviderType.MYSQL) { clpMetadataProvider = new ClpMySQLMetadataProvider(config); } else { - log.error("Unsupported metadata source: %s", config.getMetadataSource()); - throw new PrestoException(ClpErrorCode.CLP_UNSUPPORTED_METADATA_SOURCE, "Unsupported metadata source: " + config.getMetadataSource()); + throw new PrestoException(ClpErrorCode.CLP_UNSUPPORTED_METADATA_SOURCE, + "Unsupported metadata provider type: " + config.getMetadataProviderType()); } - if (config.getSplitSource() == ClpConfig.SplitSource.MYSQL) { + if (config.getSplitProviderType() == ClpConfig.SplitProviderType.MYSQL) { clpSplitProvider = new ClpMySQLSplitProvider(config); } else { - log.error("Unsupported split source: %s", config.getSplitSource()); - throw new PrestoException(ClpErrorCode.CLP_UNSUPPORTED_SPLIT_SOURCE, "Unsupported split source: " + config.getSplitSource()); + throw new PrestoException(ClpErrorCode.CLP_UNSUPPORTED_SPLIT_SOURCE, + "Unsupported split provider type: " + config.getSplitProviderType()); } this.columnHandleCache = CacheBuilder.newBuilder() diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java index 4c72549ed6aa9..810b2430f1504 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java @@ -20,27 +20,24 @@ public class ClpConfig { - // TODO(Rui): We also need to change it in Velox and in the example configuration files public enum ArchiveSource { LOCAL, S3 } - public enum MetadataSource + public enum MetadataProviderType { MYSQL } - // TODO(Rui): come up with a better name - public enum SplitSource + public enum SplitProviderType { MYSQL } private boolean polymorphicTypeEnabled = true; - private MetadataSource metadataSource = MetadataSource.MYSQL; - // TODO(Rui): We need to change it in the example configuration files and in Velox + private MetadataProviderType metadataProviderType = MetadataProviderType.MYSQL; private String metadataDbUrl; private String metadataDbName; private String metadataDbUser; @@ -49,8 +46,7 @@ public enum SplitSource private long metadataRefreshInterval = 60; private long metadataExpireInterval = 600; private ArchiveSource archiveSource = ArchiveSource.LOCAL; - // TODO(Rui): We need to add it in the example configuration files and in Velox - private SplitSource splitSource = SplitSource.MYSQL; + private SplitProviderType splitProviderType = SplitProviderType.MYSQL; public static final Pattern SAFE_SQL_IDENTIFIER = Pattern.compile("^[a-zA-Z0-9_]+$"); @@ -66,15 +62,15 @@ public ClpConfig setPolymorphicTypeEnabled(boolean polymorphicTypeEnabled) return this; } - public MetadataSource getMetadataSource() + public MetadataProviderType getMetadataProviderType() { - return metadataSource; + return metadataProviderType; } - @Config("clp.metadata-source") - public ClpConfig setMetadataSource(MetadataSource metadataSource) + @Config("clp.metadata-provider-type") + public ClpConfig setMetadataProviderType(MetadataProviderType metadataProviderType) { - this.metadataSource = metadataSource; + this.metadataProviderType = metadataProviderType; return this; } @@ -179,15 +175,15 @@ public ClpConfig setInputSource(ArchiveSource archiveSource) return this; } - public SplitSource getSplitSource() + public SplitProviderType getSplitProviderType() { - return splitSource; + return splitProviderType; } - @Config("clp.split-source") - public ClpConfig setSplitSource(SplitSource splitSource) + @Config("clp.split-provider-type") + public ClpConfig setSplitProviderType(SplitProviderType splitProviderType) { - this.splitSource = splitSource; + this.splitProviderType = splitProviderType; return this; } } From f5ca7f574288999614ca90ea1b66048714939c94 Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 3 Apr 2025 13:58:52 +0000 Subject: [PATCH 119/126] address review comments --- .../java/com/yscope/presto/ClpClient.java | 97 ------------------- .../presto/ClpFilterToKqlConverter.java | 5 +- .../java/com/yscope/presto/ClpMetadata.java | 50 ++++++++-- .../java/com/yscope/presto/ClpModule.java | 29 +++++- .../com/yscope/presto/ClpPlanOptimizer.java | 2 +- .../com/yscope/presto/ClpSplitManager.java | 9 +- .../com/yscope/presto/ClpSplitSource.java | 4 - .../com/yscope/presto/TestClpMetadata.java | 5 +- .../yscope/presto/TestClpPlanOptimizer.java | 4 +- .../java/com/yscope/presto/TestClpSplit.java | 10 +- 10 files changed, 89 insertions(+), 126 deletions(-) delete mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpClient.java delete mode 100644 presto-clp/src/main/java/com/yscope/presto/ClpSplitSource.java diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java b/presto-clp/src/main/java/com/yscope/presto/ClpClient.java deleted file mode 100644 index cf2a482487449..0000000000000 --- a/presto-clp/src/main/java/com/yscope/presto/ClpClient.java +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.yscope.presto; - -import com.facebook.airlift.log.Logger; -import com.facebook.presto.spi.PrestoException; -import com.facebook.presto.spi.SchemaTableName; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.LoadingCache; -import com.yscope.presto.metadata.ClpMetadataProvider; -import com.yscope.presto.metadata.ClpMySQLMetadataProvider; -import com.yscope.presto.split.ClpMySQLSplitProvider; -import com.yscope.presto.split.ClpSplitProvider; - -import javax.inject.Inject; - -import java.util.List; - -import static java.util.Objects.requireNonNull; -import static java.util.concurrent.TimeUnit.SECONDS; - -public class ClpClient -{ - private static final Logger log = Logger.get(ClpClient.class); - - private final LoadingCache> columnHandleCache; - private final LoadingCache> tableNameCache; - private final ClpMetadataProvider clpMetadataProvider; - private final ClpSplitProvider clpSplitProvider; - - @Inject - public ClpClient(ClpConfig config) - { - if (config.getMetadataProviderType() == ClpConfig.MetadataProviderType.MYSQL) { - clpMetadataProvider = new ClpMySQLMetadataProvider(config); - } - else { - throw new PrestoException(ClpErrorCode.CLP_UNSUPPORTED_METADATA_SOURCE, - "Unsupported metadata provider type: " + config.getMetadataProviderType()); - } - - if (config.getSplitProviderType() == ClpConfig.SplitProviderType.MYSQL) { - clpSplitProvider = new ClpMySQLSplitProvider(config); - } - else { - throw new PrestoException(ClpErrorCode.CLP_UNSUPPORTED_SPLIT_SOURCE, - "Unsupported split provider type: " + config.getSplitProviderType()); - } - - this.columnHandleCache = CacheBuilder.newBuilder() - .expireAfterWrite(config.getMetadataExpireInterval(), SECONDS) - .refreshAfterWrite(config.getMetadataRefreshInterval(), SECONDS) - .build(CacheLoader.from(this::loadColumnHandles)); - - this.tableNameCache = CacheBuilder.newBuilder() - .expireAfterWrite(config.getMetadataExpireInterval(), SECONDS) - .refreshAfterWrite(config.getMetadataRefreshInterval(), SECONDS) - .build(CacheLoader.from(this::loadTableNames)); - } - - public List loadColumnHandles(SchemaTableName schemaTableName) - { - return clpMetadataProvider.listColumnHandles(schemaTableName); - } - - public List loadTableNames(String schemaName) - { - return clpMetadataProvider.listTableNames(schemaName); - } - - public List listTables(String schemaName) - { - return tableNameCache.getUnchecked(schemaName); - } - - public List listSplits(ClpTableLayoutHandle layoutHandle) - { - return clpSplitProvider.listSplits(layoutHandle); - } - - public List listColumns(SchemaTableName schemaTableName) - { - return columnHandleCache.getUnchecked(schemaTableName); - } -} diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java index 267263bfa22cc..dd503a8372920 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java @@ -49,7 +49,6 @@ import static com.yscope.presto.ClpErrorCode.CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION; import static java.util.Objects.requireNonNull; -// TODO(Rui): Correctly handle escaping of special characters in LIKE expressions (LIKE 'a%b' ESCAPE 'a') public class ClpFilterToKqlConverter implements RowExpressionVisitor { @@ -252,7 +251,7 @@ private ClpExpression handleDereference(SpecialFormExpression expression) return handleDeferenceImpl(expression); } - // Only handles the case where there is a SQL wildcard in the middle of the string + // It currently only handles the case where there is a SQL wildcard in the middle of the string private ClpExpression handleLike(CallExpression node) { if (node.getArguments().size() != 2) { @@ -554,7 +553,7 @@ public ClpExpression visitCall(CallExpression node, Void context) Optional operatorTypeOptional = functionMetadata.getOperatorType(); if (operatorTypeOptional.isPresent()) { OperatorType operatorType = operatorTypeOptional.get(); - if (operatorType.isComparisonOperator() || operatorType != OperatorType.IS_DISTINCT_FROM) { + if (operatorType.isComparisonOperator() && operatorType != OperatorType.IS_DISTINCT_FROM) { return handleLogicalBinary(operatorType, node); } } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java b/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java index 41928791aa786..254bb8c9e17de 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java @@ -25,8 +25,12 @@ import com.facebook.presto.spi.SchemaTableName; import com.facebook.presto.spi.SchemaTablePrefix; import com.facebook.presto.spi.connector.ConnectorMetadata; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.yscope.presto.metadata.ClpMetadataProvider; import javax.inject.Inject; @@ -37,17 +41,49 @@ import java.util.function.Function; import static java.util.Objects.requireNonNull; +import static java.util.concurrent.TimeUnit.SECONDS; public class ClpMetadata implements ConnectorMetadata { - private final ClpClient clpClient; private static final String DEFAULT_SCHEMA_NAME = "default"; + private final ClpMetadataProvider clpMetadataProvider; + private final LoadingCache> columnHandleCache; + private final LoadingCache> tableNameCache; @Inject - public ClpMetadata(ClpClient clpClient) + public ClpMetadata(ClpConfig clpConfig, ClpMetadataProvider clpMetadataProvider) { - this.clpClient = clpClient; + this.columnHandleCache = CacheBuilder.newBuilder() + .expireAfterWrite(clpConfig.getMetadataExpireInterval(), SECONDS) + .refreshAfterWrite(clpConfig.getMetadataRefreshInterval(), SECONDS) + .build(CacheLoader.from(this::loadColumnHandles)); + this.tableNameCache = CacheBuilder.newBuilder() + .expireAfterWrite(clpConfig.getMetadataExpireInterval(), SECONDS) + .refreshAfterWrite(clpConfig.getMetadataRefreshInterval(), SECONDS) + .build(CacheLoader.from(this::loadTableNames)); + + this.clpMetadataProvider = clpMetadataProvider; + } + + private List loadColumnHandles(SchemaTableName schemaTableName) + { + return clpMetadataProvider.listColumnHandles(schemaTableName); + } + + private List loadTableNames(String schemaName) + { + return clpMetadataProvider.listTableNames(schemaName); + } + + private List listTables(String schemaName) + { + return tableNameCache.getUnchecked(schemaName); + } + + private List listColumns(SchemaTableName schemaTableName) + { + return columnHandleCache.getUnchecked(schemaTableName); } @Override @@ -64,7 +100,7 @@ public List listTables(ConnectorSession session, Optional new SchemaTableName(schemaNameValue, tableName)) .collect(ImmutableList.toImmutableList()); } @@ -77,7 +113,7 @@ public ConnectorTableHandle getTableHandle(ConnectorSession session, SchemaTable return null; } - if (!clpClient.listTables(schemaName).contains(tableName.getTableName())) { + if (!listTables(schemaName).contains(tableName.getTableName())) { return null; } @@ -106,7 +142,7 @@ public ConnectorTableMetadata getTableMetadata(ConnectorSession session, Connect { ClpTableHandle clpTableHandle = (ClpTableHandle) table; SchemaTableName schemaTableName = clpTableHandle.getSchemaTableName(); - List columns = clpClient.listColumns(schemaTableName).stream() + List columns = listColumns(schemaTableName).stream() .map(ClpColumnHandle::getColumnMetadata) .collect(ImmutableList.toImmutableList()); @@ -141,7 +177,7 @@ public Map> listTableColumns(ConnectorSess public Map getColumnHandles(ConnectorSession session, ConnectorTableHandle tableHandle) { ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle; - return clpClient.listColumns(clpTableHandle.getSchemaTableName()).stream() + return listColumns(clpTableHandle.getSchemaTableName()).stream() .collect(ImmutableMap.toImmutableMap( ClpColumnHandle::getColumnName, column -> column)); diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpModule.java b/presto-clp/src/main/java/com/yscope/presto/ClpModule.java index 311a9b259e01b..e2f3b4fb38d8f 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpModule.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpModule.java @@ -13,22 +13,43 @@ */ package com.yscope.presto; +import com.facebook.airlift.configuration.AbstractConfigurationAwareModule; +import com.facebook.presto.spi.PrestoException; import com.google.inject.Binder; -import com.google.inject.Module; import com.google.inject.Scopes; +import com.yscope.presto.metadata.ClpMetadataProvider; +import com.yscope.presto.metadata.ClpMySQLMetadataProvider; +import com.yscope.presto.split.ClpMySQLSplitProvider; +import com.yscope.presto.split.ClpSplitProvider; import static com.facebook.airlift.configuration.ConfigBinder.configBinder; public class ClpModule - implements Module + extends AbstractConfigurationAwareModule { @Override - public void configure(Binder binder) + protected void setup(Binder binder) { binder.bind(ClpConnector.class).in(Scopes.SINGLETON); binder.bind(ClpMetadata.class).in(Scopes.SINGLETON); binder.bind(ClpSplitManager.class).in(Scopes.SINGLETON); - binder.bind(ClpClient.class).in(Scopes.SINGLETON); configBinder(binder).bindConfig(ClpConfig.class); + + ClpConfig config = buildConfigObject(ClpConfig.class); + if (config.getMetadataProviderType() == ClpConfig.MetadataProviderType.MYSQL) { + binder.bind(ClpMetadataProvider.class).to(ClpMySQLMetadataProvider.class).in(Scopes.SINGLETON); + } + else { + throw new PrestoException(ClpErrorCode.CLP_UNSUPPORTED_METADATA_SOURCE, + "Unsupported metadata provider type: " + config.getMetadataProviderType()); + } + + if (config.getSplitProviderType() == ClpConfig.SplitProviderType.MYSQL) { + binder.bind(ClpSplitProvider.class).to(ClpMySQLSplitProvider.class).in(Scopes.SINGLETON); + } + else { + throw new PrestoException(ClpErrorCode.CLP_UNSUPPORTED_SPLIT_SOURCE, + "Unsupported split provider type: " + config.getSplitProviderType()); + } } } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java index 6ffb227d9d4ad..df47917dbf49a 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java @@ -86,7 +86,7 @@ public PlanNode visitFilter(FilterNode node, RewriteContext context) if (!kqlQuery.isPresent()) { return node; } - log.info("KQL query: %s", kqlQuery.get()); + log.debug("KQL query: %s", kqlQuery.get()); ClpTableLayoutHandle clpTableLayoutHandle = new ClpTableLayoutHandle(clpTableHandle, kqlQuery); TableScanNode newTableScanNode = new TableScanNode( tableScanNode.getSourceLocation(), diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java b/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java index 23aa5a79dc7b6..69178c83741f3 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java +++ b/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java @@ -19,18 +19,19 @@ import com.facebook.presto.spi.FixedSplitSource; import com.facebook.presto.spi.connector.ConnectorSplitManager; import com.facebook.presto.spi.connector.ConnectorTransactionHandle; +import com.yscope.presto.split.ClpSplitProvider; import javax.inject.Inject; public class ClpSplitManager implements ConnectorSplitManager { - private final ClpClient clpClient; + private final ClpSplitProvider clpSplitProvider; @Inject - public ClpSplitManager(ClpClient clpClient) + public ClpSplitManager(ClpSplitProvider clpSplitProvider) { - this.clpClient = clpClient; + this.clpSplitProvider = clpSplitProvider; } @Override @@ -40,6 +41,6 @@ public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHand SplitSchedulingContext splitSchedulingContext) { ClpTableLayoutHandle layoutHandle = (ClpTableLayoutHandle) layout; - return new FixedSplitSource(clpClient.listSplits(layoutHandle)); + return new FixedSplitSource(clpSplitProvider.listSplits(layoutHandle)); } } diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplitSource.java b/presto-clp/src/main/java/com/yscope/presto/ClpSplitSource.java deleted file mode 100644 index 605f8bedf5728..0000000000000 --- a/presto-clp/src/main/java/com/yscope/presto/ClpSplitSource.java +++ /dev/null @@ -1,4 +0,0 @@ -package com.yscope.presto; - -public class ClpSplitSource { -} diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java index fc2dfc58ff118..d96d37e2daf9a 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java @@ -23,6 +23,8 @@ import com.facebook.presto.spi.ConnectorTableMetadata; import com.facebook.presto.spi.SchemaTableName; import com.google.common.collect.ImmutableList; +import com.yscope.presto.metadata.ClpMetadataProvider; +import com.yscope.presto.metadata.ClpMySQLMetadataProvider; import com.yscope.presto.metadata.ClpNodeType; import org.apache.commons.math3.util.Pair; import org.testng.annotations.AfterMethod; @@ -67,7 +69,8 @@ public void setUp() .setMetadataDbUser("sa") .setMetadataDbPassword("") .setMetadataTablePrefix(metadataDbTablePrefix); - metadata = new ClpMetadata(new ClpClient(config)); + ClpMetadataProvider metadataProvider = new ClpMySQLMetadataProvider(config); + metadata = new ClpMetadata(config, metadataProvider); final String tableMetadataTableName = metadataDbTablePrefix + tableMetadataSuffix; final String columnMetadataTableName = metadataDbTablePrefix + columnMetadataTablePrefix + TABLE_NAME; diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java b/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java index 18539f6fc9ae6..db1027d9d17ad 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java @@ -170,6 +170,7 @@ public void testNotPushdown() // Multiple NOTs testFilter("NOT (NOT fare = 0)", Optional.of("NOT NOT fare: 0"), Optional.empty(), sessionHolder); testFilter("NOT (fare = 0 AND city.Name = 'hello world')", Optional.of("NOT (fare: 0 AND city.Name: \"hello world\")"), Optional.empty(), sessionHolder); + testFilter("NOT (fare = 0 OR city.Name = 'hello world')", Optional.of("NOT (fare: 0 OR city.Name: \"hello world\")"), Optional.empty(), sessionHolder); } @Test @@ -187,6 +188,7 @@ public void testIsNullPushdown() testFilter("city.Name IS NULL", Optional.of("NOT city.Name: *"), Optional.empty(), sessionHolder); testFilter("city.Name IS NOT NULL", Optional.of("NOT NOT city.Name: *"), Optional.empty(), sessionHolder); + testFilter("NOT (city.Name IS NULL)", Optional.of("NOT NOT city.Name: *"), Optional.empty(), sessionHolder); } @Test @@ -198,7 +200,7 @@ public void testComplexPushdown() Optional.of("((fare > 0 OR city.Name: \"b*\"))"), Optional.of("(lower(city.Region.Name) = 'hello world' OR city.Name IS NULL)"), sessionHolder); - testFilter("city.Region.Id = 1 AND (fare > 0 OR city.Name not like 'b%') AND (lower(city.Region.Name) = 'hello world' OR city.Name IS NULL)", + testFilter("city.Region.Id = 1 AND (fare > 0 OR city.Name NOT like 'b%') AND (lower(city.Region.Name) = 'hello world' OR city.Name IS NULL)", Optional.of("((city.Region.Id: 1 AND (fare > 0 OR NOT city.Name: \"b*\")))"), Optional.of("lower(city.Region.Name) = 'hello world' OR city.Name IS NULL"), sessionHolder); diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpSplit.java b/presto-clp/src/test/java/com/yscope/presto/TestClpSplit.java index d1e1d886965c8..8c13656b5ce20 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpSplit.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpSplit.java @@ -14,6 +14,8 @@ package com.yscope.presto; import com.facebook.presto.spi.SchemaTableName; +import com.yscope.presto.split.ClpMySQLSplitProvider; +import com.yscope.presto.split.ClpSplitProvider; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -34,7 +36,7 @@ @Test(singleThreaded = true) public class TestClpSplit { - ClpClient client; + private ClpConfig config; private static final String TABLE_NAME_1 = "test_1"; private static final String TABLE_NAME_2 = "test_2"; private static final String TABLE_NAME_3 = "test_3"; @@ -52,12 +54,11 @@ public void setUp() final String tableMetadataSuffix = "table_metadata"; final String archiveTableSuffix = "_archives"; - ClpConfig config = new ClpConfig().setPolymorphicTypeEnabled(true) + this.config = new ClpConfig().setPolymorphicTypeEnabled(true) .setMetadataDbUrl(metadataDbUrl) .setMetadataDbUser("sa") .setMetadataDbPassword("") .setMetadataTablePrefix(metadataDbTablePrefix); - client = new ClpClient(config); final String tableMetadataTableName = metadataDbTablePrefix + tableMetadataSuffix; final String archiveTableFormat = metadataDbTablePrefix + "%s" + archiveTableSuffix; @@ -120,9 +121,10 @@ public void tearDown() @Test public void testListSplits() { + ClpSplitProvider splitProvider = new ClpMySQLSplitProvider(config); for (String tableName : TABLE_NAME_LIST) { ClpTableLayoutHandle layoutHandle = new ClpTableLayoutHandle(new ClpTableHandle(new SchemaTableName(TABLE_SCHEMA, tableName)), Optional.empty()); - List splits = client.listSplits(layoutHandle); + List splits = splitProvider.listSplits(layoutHandle); assertEquals(splits.size(), NUM_SPLITS); for (int i = 0; i < NUM_SPLITS; i++) { assertEquals(splits.get(i).getArchivePath(), "/tmp/archives/" + tableName + "/id_" + i); From 1b63cbc8b83f43d00460d236955ef4eb8567be76 Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 3 Apr 2025 14:51:50 +0000 Subject: [PATCH 120/126] use archive_id --- .../com/yscope/presto/split/ClpMySQLSplitProvider.java | 4 ++-- .../src/test/java/com/yscope/presto/TestClpSplit.java | 9 +++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java b/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java index 0331d93aa1d1a..444fd3ee4e4cb 100644 --- a/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java +++ b/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java @@ -35,7 +35,7 @@ public class ClpMySQLSplitProvider private static final String ARCHIVE_TABLE_SUFFIX = "_archives"; private static final String TABLE_METADATA_TABLE_SUFFIX = "table_metadata"; - private static final String QUERY_SELECT_ARCHIVE_IDS = "SELECT id FROM %s%s" + ARCHIVE_TABLE_SUFFIX; + private static final String QUERY_SELECT_ARCHIVE_IDS = "SELECT archive_id FROM %s%s" + ARCHIVE_TABLE_SUFFIX; private static final String QUERY_SELECT_TABLE_METADATA = "SELECT table_path FROM %s" + TABLE_METADATA_TABLE_SUFFIX + " WHERE table_name = '%s'"; private final ClpConfig config; @@ -93,7 +93,7 @@ public List listSplits(ClpTableLayoutHandle clpTableLayoutHandle) try (PreparedStatement statement = connection.prepareStatement(archivePathQuery); ResultSet resultSet = statement.executeQuery()) { while (resultSet.next()) { - final String archiveId = resultSet.getString("id"); + final String archiveId = resultSet.getString("archive_id"); final String archivePath = tablePath + "/" + archiveId; splits.add(new ClpSplit(tableSchemaName, archivePath, clpTableLayoutHandle.getQuery())); } diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpSplit.java b/presto-clp/src/test/java/com/yscope/presto/TestClpSplit.java index 8c13656b5ce20..4ad79aafa9d16 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpSplit.java +++ b/presto-clp/src/test/java/com/yscope/presto/TestClpSplit.java @@ -86,10 +86,15 @@ public void setUp() // Create and populate archive tables for (String tableName : TABLE_NAME_LIST) { String archiveTableName = String.format(archiveTableFormat, tableName); - String createArchiveTableSQL = String.format("CREATE TABLE IF NOT EXISTS %s (id VARCHAR(128) PRIMARY KEY)", archiveTableName); + String createArchiveTableSQL = String.format( + "CREATE TABLE IF NOT EXISTS %s (" + + "id BIGINT AUTO_INCREMENT PRIMARY KEY, " + + "archive_id VARCHAR(128) NOT NULL" + + ")", + archiveTableName); stmt.execute(createArchiveTableSQL); - String insertArchiveTableSQL = String.format("INSERT INTO %s (id) VALUES (?)", archiveTableName); + String insertArchiveTableSQL = String.format("INSERT INTO %s (archive_id) VALUES (?)", archiveTableName); try (PreparedStatement pstmt = conn.prepareStatement(insertArchiveTableSQL)) { for (int i = 0; i < NUM_SPLITS; i++) { pstmt.setString(1, "id_" + i); From 37e1b4c08a6911f1a80aee8d2976d824899dd8ac Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 3 Apr 2025 15:03:37 +0000 Subject: [PATCH 121/126] add comments in ClpSchemaTree --- .../com/yscope/presto/metadata/ClpSchemaTree.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaTree.java b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaTree.java index 6db88e9bc9d45..e8c54dafbfc76 100644 --- a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaTree.java +++ b/presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaTree.java @@ -76,6 +76,14 @@ private Type mapColumnType(byte type) } } + /** + * Adds a column to the internal CLP schema tree, creating intermediate nested nodes as needed. + * Handles potential name conflicts when polymorphic types are enabled by suffixing column names + * with type display names. + * + * @param fullName Fully qualified column name using dot notation (e.g., "a.b.c"). + * @param type Serialized byte value representing the CLP column's type. + */ public void addColumn(String fullName, byte type) { Type prestoType = mapColumnType(type); @@ -124,6 +132,13 @@ else if (current.conflictingBaseNames.contains(leafName)) { current.children.put(finalLeafName, leaf); } + /** + * Traverses the CLP schema tree and collects all leaf and nested structure nodes + * into a flat list of column handles. For nested structures, builds a RowType + * from child nodes. + * + * @return List of ClpColumnHandle objects representing the full schema. + */ public List collectColumnHandles() { List columns = new ArrayList<>(); From 8428517956d60d3727b8c3731819a573683b9cfa Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 4 Apr 2025 00:03:01 +0000 Subject: [PATCH 122/126] rename package name and class name --- pom.xml | 2 +- presto-clp/pom.xml | 1 - .../presto/plugin/clp}/ClpColumnHandle.java | 2 +- .../presto/plugin/clp}/ClpConfig.java | 2 +- .../presto/plugin/clp}/ClpConnector.java | 2 +- .../presto/plugin/clp}/ClpConnectorFactory.java | 2 +- .../presto/plugin/clp}/ClpErrorCode.java | 2 +- .../presto/plugin/clp}/ClpExpression.java | 2 +- .../plugin/clp}/ClpFilterToKqlConverter.java | 4 ++-- .../presto/plugin/clp}/ClpHandleResolver.java | 2 +- .../presto/plugin/clp}/ClpMetadata.java | 4 ++-- .../presto/plugin/clp}/ClpModule.java | 14 +++++++------- .../presto/plugin/clp}/ClpPlanOptimizer.java | 2 +- .../plugin/clp}/ClpPlanOptimizerProvider.java | 2 +- .../presto/plugin/clp}/ClpPlugin.java | 2 +- .../presto/plugin/clp}/ClpSplit.java | 2 +- .../presto/plugin/clp}/ClpSplitManager.java | 4 ++-- .../presto/plugin/clp}/ClpTableHandle.java | 2 +- .../presto/plugin/clp}/ClpTableLayoutHandle.java | 2 +- .../presto/plugin/clp}/ClpTransactionHandle.java | 2 +- .../plugin/clp}/metadata/ClpMetadataProvider.java | 4 ++-- .../clp/metadata/ClpMySqlMetadataProvider.java} | 12 ++++++------ .../presto/plugin/clp}/metadata/ClpNodeType.java | 2 +- .../presto/plugin/clp}/metadata/ClpSchemaTree.java | 6 +++--- .../plugin/clp/split/ClpMySqlSplitProvider.java} | 14 +++++++------- .../presto/plugin/clp}/split/ClpSplitProvider.java | 6 +++--- .../presto/plugin/clp}/TestClpMetadata.java | 10 +++++----- .../presto/plugin/clp}/TestClpPlanOptimizer.java | 2 +- .../presto/plugin/clp}/TestClpQueryBase.java | 2 +- .../presto/plugin/clp}/TestClpSplit.java | 8 ++++---- presto-server/src/main/provisio/presto.xml | 2 +- 31 files changed, 62 insertions(+), 63 deletions(-) rename presto-clp/src/main/java/com/{yscope/presto => facebook/presto/plugin/clp}/ClpColumnHandle.java (98%) rename presto-clp/src/main/java/com/{yscope/presto => facebook/presto/plugin/clp}/ClpConfig.java (99%) rename presto-clp/src/main/java/com/{yscope/presto => facebook/presto/plugin/clp}/ClpConnector.java (98%) rename presto-clp/src/main/java/com/{yscope/presto => facebook/presto/plugin/clp}/ClpConnectorFactory.java (98%) rename presto-clp/src/main/java/com/{yscope/presto => facebook/presto/plugin/clp}/ClpErrorCode.java (97%) rename presto-clp/src/main/java/com/{yscope/presto => facebook/presto/plugin/clp}/ClpExpression.java (98%) rename presto-clp/src/main/java/com/{yscope/presto => facebook/presto/plugin/clp}/ClpFilterToKqlConverter.java (99%) rename presto-clp/src/main/java/com/{yscope/presto => facebook/presto/plugin/clp}/ClpHandleResolver.java (97%) rename presto-clp/src/main/java/com/{yscope/presto => facebook/presto/plugin/clp}/ClpMetadata.java (98%) rename presto-clp/src/main/java/com/{yscope/presto => facebook/presto/plugin/clp}/ClpModule.java (81%) rename presto-clp/src/main/java/com/{yscope/presto => facebook/presto/plugin/clp}/ClpPlanOptimizer.java (99%) rename presto-clp/src/main/java/com/{yscope/presto => facebook/presto/plugin/clp}/ClpPlanOptimizerProvider.java (97%) rename presto-clp/src/main/java/com/{yscope/presto => facebook/presto/plugin/clp}/ClpPlugin.java (95%) rename presto-clp/src/main/java/com/{yscope/presto => facebook/presto/plugin/clp}/ClpSplit.java (98%) rename presto-clp/src/main/java/com/{yscope/presto => facebook/presto/plugin/clp}/ClpSplitManager.java (94%) rename presto-clp/src/main/java/com/{yscope/presto => facebook/presto/plugin/clp}/ClpTableHandle.java (97%) rename presto-clp/src/main/java/com/{yscope/presto => facebook/presto/plugin/clp}/ClpTableLayoutHandle.java (97%) rename presto-clp/src/main/java/com/{yscope/presto => facebook/presto/plugin/clp}/ClpTransactionHandle.java (94%) rename presto-clp/src/main/java/com/{yscope/presto => facebook/presto/plugin/clp}/metadata/ClpMetadataProvider.java (88%) rename presto-clp/src/main/java/com/{yscope/presto/metadata/ClpMySQLMetadataProvider.java => facebook/presto/plugin/clp/metadata/ClpMySqlMetadataProvider.java} (92%) rename presto-clp/src/main/java/com/{yscope/presto => facebook/presto/plugin/clp}/metadata/ClpNodeType.java (97%) rename presto-clp/src/main/java/com/{yscope/presto => facebook/presto/plugin/clp}/metadata/ClpSchemaTree.java (97%) rename presto-clp/src/main/java/com/{yscope/presto/split/ClpMySQLSplitProvider.java => facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java} (92%) rename presto-clp/src/main/java/com/{yscope/presto => facebook/presto/plugin/clp}/split/ClpSplitProvider.java (81%) rename presto-clp/src/test/java/com/{yscope/presto => facebook/presto/plugin/clp}/TestClpMetadata.java (96%) rename presto-clp/src/test/java/com/{yscope/presto => facebook/presto/plugin/clp}/TestClpPlanOptimizer.java (99%) rename presto-clp/src/test/java/com/{yscope/presto => facebook/presto/plugin/clp}/TestClpQueryBase.java (99%) rename presto-clp/src/test/java/com/{yscope/presto => facebook/presto/plugin/clp}/TestClpSplit.java (96%) diff --git a/pom.xml b/pom.xml index ca79bb2a54b70..980a063f688e8 100644 --- a/pom.xml +++ b/pom.xml @@ -763,7 +763,7 @@ - com.yscope.presto + com.facebook.presto presto-clp ${project.version} diff --git a/presto-clp/pom.xml b/presto-clp/pom.xml index 63abb7bc987ec..5682de9be673b 100644 --- a/presto-clp/pom.xml +++ b/presto-clp/pom.xml @@ -9,7 +9,6 @@ 0.292-SNAPSHOT - com.yscope.presto presto-clp Presto - CLP Connector presto-plugin diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpColumnHandle.java similarity index 98% rename from presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpColumnHandle.java index 4de90d6b7e059..98a05bf15ac7c 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpColumnHandle.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpColumnHandle.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto; +package com.facebook.presto.plugin.clp; import com.facebook.presto.common.type.Type; import com.facebook.presto.spi.ColumnHandle; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java similarity index 99% rename from presto-clp/src/main/java/com/yscope/presto/ClpConfig.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java index 810b2430f1504..641bd5860e984 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpConfig.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto; +package com.facebook.presto.plugin.clp; import com.facebook.airlift.configuration.Config; import com.facebook.presto.spi.PrestoException; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnector.java similarity index 98% rename from presto-clp/src/main/java/com/yscope/presto/ClpConnector.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnector.java index b081abe7eb2e4..fe4f66df99516 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpConnector.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnector.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto; +package com.facebook.presto.plugin.clp; import com.facebook.airlift.bootstrap.LifeCycleManager; import com.facebook.airlift.log.Logger; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpConnectorFactory.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnectorFactory.java similarity index 98% rename from presto-clp/src/main/java/com/yscope/presto/ClpConnectorFactory.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnectorFactory.java index 5b9e003b093c7..a984943df87b7 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpConnectorFactory.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnectorFactory.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto; +package com.facebook.presto.plugin.clp; import com.facebook.airlift.bootstrap.Bootstrap; import com.facebook.airlift.json.JsonModule; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpErrorCode.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java similarity index 97% rename from presto-clp/src/main/java/com/yscope/presto/ClpErrorCode.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java index 52ef0675fecd4..8cb2438277404 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpErrorCode.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto; +package com.facebook.presto.plugin.clp; import com.facebook.presto.common.ErrorCode; import com.facebook.presto.common.ErrorType; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpExpression.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java similarity index 98% rename from presto-clp/src/main/java/com/yscope/presto/ClpExpression.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java index 95fca73373834..df32727bcf27b 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpExpression.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto; +package com.facebook.presto.plugin.clp; import com.facebook.presto.spi.relation.RowExpression; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFilterToKqlConverter.java similarity index 99% rename from presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFilterToKqlConverter.java index dd503a8372920..f3bc9a8adda6e 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFilterToKqlConverter.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto; +package com.facebook.presto.plugin.clp; import com.facebook.presto.common.function.OperatorType; import com.facebook.presto.common.type.RowType; @@ -45,8 +45,8 @@ import static com.facebook.presto.common.function.OperatorType.LESS_THAN_OR_EQUAL; import static com.facebook.presto.common.function.OperatorType.NOT_EQUAL; import static com.facebook.presto.common.type.BooleanType.BOOLEAN; +import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION; import static com.facebook.presto.spi.relation.SpecialFormExpression.Form.AND; -import static com.yscope.presto.ClpErrorCode.CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION; import static java.util.Objects.requireNonNull; public class ClpFilterToKqlConverter diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpHandleResolver.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpHandleResolver.java similarity index 97% rename from presto-clp/src/main/java/com/yscope/presto/ClpHandleResolver.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpHandleResolver.java index c281ba3230e90..462ecc039b9c6 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpHandleResolver.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpHandleResolver.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto; +package com.facebook.presto.plugin.clp; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.ConnectorHandleResolver; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpMetadata.java similarity index 98% rename from presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpMetadata.java index 254bb8c9e17de..1172d278b63be 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpMetadata.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpMetadata.java @@ -11,8 +11,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto; +package com.facebook.presto.plugin.clp; +import com.facebook.presto.plugin.clp.metadata.ClpMetadataProvider; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.ColumnMetadata; import com.facebook.presto.spi.ConnectorSession; @@ -30,7 +31,6 @@ import com.google.common.cache.LoadingCache; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.yscope.presto.metadata.ClpMetadataProvider; import javax.inject.Inject; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpModule.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java similarity index 81% rename from presto-clp/src/main/java/com/yscope/presto/ClpModule.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java index e2f3b4fb38d8f..da8db4d83efa4 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpModule.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java @@ -11,16 +11,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto; +package com.facebook.presto.plugin.clp; import com.facebook.airlift.configuration.AbstractConfigurationAwareModule; +import com.facebook.presto.plugin.clp.metadata.ClpMetadataProvider; +import com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider; +import com.facebook.presto.plugin.clp.split.ClpMySqlSplitProvider; +import com.facebook.presto.plugin.clp.split.ClpSplitProvider; import com.facebook.presto.spi.PrestoException; import com.google.inject.Binder; import com.google.inject.Scopes; -import com.yscope.presto.metadata.ClpMetadataProvider; -import com.yscope.presto.metadata.ClpMySQLMetadataProvider; -import com.yscope.presto.split.ClpMySQLSplitProvider; -import com.yscope.presto.split.ClpSplitProvider; import static com.facebook.airlift.configuration.ConfigBinder.configBinder; @@ -37,7 +37,7 @@ protected void setup(Binder binder) ClpConfig config = buildConfigObject(ClpConfig.class); if (config.getMetadataProviderType() == ClpConfig.MetadataProviderType.MYSQL) { - binder.bind(ClpMetadataProvider.class).to(ClpMySQLMetadataProvider.class).in(Scopes.SINGLETON); + binder.bind(ClpMetadataProvider.class).to(ClpMySqlMetadataProvider.class).in(Scopes.SINGLETON); } else { throw new PrestoException(ClpErrorCode.CLP_UNSUPPORTED_METADATA_SOURCE, @@ -45,7 +45,7 @@ protected void setup(Binder binder) } if (config.getSplitProviderType() == ClpConfig.SplitProviderType.MYSQL) { - binder.bind(ClpSplitProvider.class).to(ClpMySQLSplitProvider.class).in(Scopes.SINGLETON); + binder.bind(ClpSplitProvider.class).to(ClpMySqlSplitProvider.class).in(Scopes.SINGLETON); } else { throw new PrestoException(ClpErrorCode.CLP_UNSUPPORTED_SPLIT_SOURCE, diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizer.java similarity index 99% rename from presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizer.java index df47917dbf49a..8703f538a0661 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizer.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizer.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto; +package com.facebook.presto.plugin.clp; import com.facebook.airlift.log.Logger; import com.facebook.presto.spi.ColumnHandle; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizerProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizerProvider.java similarity index 97% rename from presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizerProvider.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizerProvider.java index 5b204fe46569a..2268ce26c238e 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpPlanOptimizerProvider.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlanOptimizerProvider.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto; +package com.facebook.presto.plugin.clp; import com.facebook.presto.spi.ConnectorPlanOptimizer; import com.facebook.presto.spi.connector.ConnectorPlanOptimizerProvider; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpPlugin.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlugin.java similarity index 95% rename from presto-clp/src/main/java/com/yscope/presto/ClpPlugin.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlugin.java index 97d86dc0c2cba..985c707a32483 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpPlugin.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpPlugin.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto; +package com.facebook.presto.plugin.clp; import com.facebook.presto.spi.Plugin; import com.facebook.presto.spi.connector.ConnectorFactory; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpSplit.java similarity index 98% rename from presto-clp/src/main/java/com/yscope/presto/ClpSplit.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpSplit.java index 2c4ba9b89434c..680a5c672ff87 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpSplit.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpSplit.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto; +package com.facebook.presto.plugin.clp; import com.facebook.presto.spi.ConnectorSplit; import com.facebook.presto.spi.HostAddress; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpSplitManager.java similarity index 94% rename from presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpSplitManager.java index 69178c83741f3..07c77036bf405 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpSplitManager.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpSplitManager.java @@ -11,15 +11,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto; +package com.facebook.presto.plugin.clp; +import com.facebook.presto.plugin.clp.split.ClpSplitProvider; import com.facebook.presto.spi.ConnectorSession; import com.facebook.presto.spi.ConnectorSplitSource; import com.facebook.presto.spi.ConnectorTableLayoutHandle; import com.facebook.presto.spi.FixedSplitSource; import com.facebook.presto.spi.connector.ConnectorSplitManager; import com.facebook.presto.spi.connector.ConnectorTransactionHandle; -import com.yscope.presto.split.ClpSplitProvider; import javax.inject.Inject; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableHandle.java similarity index 97% rename from presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableHandle.java index 473b734e100ea..fb6dc88bbb584 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpTableHandle.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableHandle.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto; +package com.facebook.presto.plugin.clp; import com.facebook.presto.spi.ConnectorTableHandle; import com.facebook.presto.spi.SchemaTableName; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpTableLayoutHandle.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java similarity index 97% rename from presto-clp/src/main/java/com/yscope/presto/ClpTableLayoutHandle.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java index 77ae589cdbf6c..d524101bed863 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpTableLayoutHandle.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto; +package com.facebook.presto.plugin.clp; import com.facebook.presto.spi.ConnectorTableLayoutHandle; import com.fasterxml.jackson.annotation.JsonCreator; diff --git a/presto-clp/src/main/java/com/yscope/presto/ClpTransactionHandle.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTransactionHandle.java similarity index 94% rename from presto-clp/src/main/java/com/yscope/presto/ClpTransactionHandle.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTransactionHandle.java index 3b7b47e55bc06..f39cd639072d6 100644 --- a/presto-clp/src/main/java/com/yscope/presto/ClpTransactionHandle.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTransactionHandle.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto; +package com.facebook.presto.plugin.clp; import com.facebook.presto.spi.connector.ConnectorTransactionHandle; diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMetadataProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMetadataProvider.java similarity index 88% rename from presto-clp/src/main/java/com/yscope/presto/metadata/ClpMetadataProvider.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMetadataProvider.java index c400dc8864d5d..4f125e601bc03 100644 --- a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMetadataProvider.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMetadataProvider.java @@ -11,10 +11,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto.metadata; +package com.facebook.presto.plugin.clp.metadata; +import com.facebook.presto.plugin.clp.ClpColumnHandle; import com.facebook.presto.spi.SchemaTableName; -import com.yscope.presto.ClpColumnHandle; import java.util.List; diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMySqlMetadataProvider.java similarity index 92% rename from presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMySqlMetadataProvider.java index 9c1b75ffd66a8..4644568d1d300 100644 --- a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpMySQLMetadataProvider.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMySqlMetadataProvider.java @@ -11,12 +11,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto.metadata; +package com.facebook.presto.plugin.clp.metadata; import com.facebook.airlift.log.Logger; +import com.facebook.presto.plugin.clp.ClpColumnHandle; +import com.facebook.presto.plugin.clp.ClpConfig; import com.facebook.presto.spi.SchemaTableName; -import com.yscope.presto.ClpColumnHandle; -import com.yscope.presto.ClpConfig; import java.sql.Connection; import java.sql.DriverManager; @@ -27,10 +27,10 @@ import java.util.ArrayList; import java.util.List; -public class ClpMySQLMetadataProvider +public class ClpMySqlMetadataProvider implements ClpMetadataProvider { - private static final Logger log = Logger.get(ClpMySQLMetadataProvider.class); + private static final Logger log = Logger.get(ClpMySqlMetadataProvider.class); public static final String COLUMN_METADATA_PREFIX = "column_metadata_"; private static final String QUERY_SELECT_COLUMNS = "SELECT * FROM %s" + COLUMN_METADATA_PREFIX + "%s"; @@ -39,7 +39,7 @@ public class ClpMySQLMetadataProvider private final ClpConfig config; - public ClpMySQLMetadataProvider(ClpConfig config) + public ClpMySqlMetadataProvider(ClpConfig config) { try { Class.forName("com.mysql.jdbc.Driver"); diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpNodeType.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpNodeType.java similarity index 97% rename from presto-clp/src/main/java/com/yscope/presto/metadata/ClpNodeType.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpNodeType.java index 32e95111375cd..a2b30bde98c98 100644 --- a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpNodeType.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpNodeType.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto.metadata; +package com.facebook.presto.plugin.clp.metadata; public enum ClpNodeType { diff --git a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaTree.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpSchemaTree.java similarity index 97% rename from presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaTree.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpSchemaTree.java index e8c54dafbfc76..9d73cfa8b4513 100644 --- a/presto-clp/src/main/java/com/yscope/presto/metadata/ClpSchemaTree.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpSchemaTree.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto.metadata; +package com.facebook.presto.plugin.clp.metadata; import com.facebook.presto.common.type.ArrayType; import com.facebook.presto.common.type.BigintType; @@ -20,9 +20,9 @@ import com.facebook.presto.common.type.RowType; import com.facebook.presto.common.type.Type; import com.facebook.presto.common.type.VarcharType; +import com.facebook.presto.plugin.clp.ClpColumnHandle; +import com.facebook.presto.plugin.clp.ClpErrorCode; import com.facebook.presto.spi.PrestoException; -import com.yscope.presto.ClpColumnHandle; -import com.yscope.presto.ClpErrorCode; import java.util.ArrayList; import java.util.HashMap; diff --git a/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java similarity index 92% rename from presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java index 444fd3ee4e4cb..c2e422281d607 100644 --- a/presto-clp/src/main/java/com/yscope/presto/split/ClpMySQLSplitProvider.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java @@ -11,14 +11,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto.split; +package com.facebook.presto.plugin.clp.split; import com.facebook.airlift.log.Logger; +import com.facebook.presto.plugin.clp.ClpConfig; +import com.facebook.presto.plugin.clp.ClpSplit; +import com.facebook.presto.plugin.clp.ClpTableLayoutHandle; import com.facebook.presto.spi.SchemaTableName; import com.google.common.collect.ImmutableList; -import com.yscope.presto.ClpConfig; -import com.yscope.presto.ClpSplit; -import com.yscope.presto.ClpTableLayoutHandle; import java.sql.Connection; import java.sql.DriverManager; @@ -28,10 +28,10 @@ import java.util.ArrayList; import java.util.List; -public class ClpMySQLSplitProvider +public class ClpMySqlSplitProvider implements ClpSplitProvider { - private static final Logger log = Logger.get(ClpMySQLSplitProvider.class); + private static final Logger log = Logger.get(ClpMySqlSplitProvider.class); private static final String ARCHIVE_TABLE_SUFFIX = "_archives"; private static final String TABLE_METADATA_TABLE_SUFFIX = "table_metadata"; @@ -40,7 +40,7 @@ public class ClpMySQLSplitProvider private final ClpConfig config; - public ClpMySQLSplitProvider(ClpConfig config) + public ClpMySqlSplitProvider(ClpConfig config) { try { Class.forName("com.mysql.jdbc.Driver"); diff --git a/presto-clp/src/main/java/com/yscope/presto/split/ClpSplitProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitProvider.java similarity index 81% rename from presto-clp/src/main/java/com/yscope/presto/split/ClpSplitProvider.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitProvider.java index 394446136f2b7..17cd6dc81d7f6 100644 --- a/presto-clp/src/main/java/com/yscope/presto/split/ClpSplitProvider.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitProvider.java @@ -11,10 +11,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto.split; +package com.facebook.presto.plugin.clp.split; -import com.yscope.presto.ClpSplit; -import com.yscope.presto.ClpTableLayoutHandle; +import com.facebook.presto.plugin.clp.ClpSplit; +import com.facebook.presto.plugin.clp.ClpTableLayoutHandle; import java.util.List; diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMetadata.java similarity index 96% rename from presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java rename to presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMetadata.java index d96d37e2daf9a..f565417e9007e 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpMetadata.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMetadata.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto; +package com.facebook.presto.plugin.clp; import com.facebook.presto.common.type.ArrayType; import com.facebook.presto.common.type.BigintType; @@ -19,13 +19,13 @@ import com.facebook.presto.common.type.DoubleType; import com.facebook.presto.common.type.RowType; import com.facebook.presto.common.type.VarcharType; +import com.facebook.presto.plugin.clp.metadata.ClpMetadataProvider; +import com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider; +import com.facebook.presto.plugin.clp.metadata.ClpNodeType; import com.facebook.presto.spi.ColumnMetadata; import com.facebook.presto.spi.ConnectorTableMetadata; import com.facebook.presto.spi.SchemaTableName; import com.google.common.collect.ImmutableList; -import com.yscope.presto.metadata.ClpMetadataProvider; -import com.yscope.presto.metadata.ClpMySQLMetadataProvider; -import com.yscope.presto.metadata.ClpNodeType; import org.apache.commons.math3.util.Pair; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; @@ -69,7 +69,7 @@ public void setUp() .setMetadataDbUser("sa") .setMetadataDbPassword("") .setMetadataTablePrefix(metadataDbTablePrefix); - ClpMetadataProvider metadataProvider = new ClpMySQLMetadataProvider(config); + ClpMetadataProvider metadataProvider = new ClpMySqlMetadataProvider(config); metadata = new ClpMetadata(config, metadataProvider); final String tableMetadataTableName = metadataDbTablePrefix + tableMetadataSuffix; diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpPlanOptimizer.java similarity index 99% rename from presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java rename to presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpPlanOptimizer.java index db1027d9d17ad..66d47a2e2805d 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpPlanOptimizer.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpPlanOptimizer.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto; +package com.facebook.presto.plugin.clp; import com.facebook.presto.spi.relation.RowExpression; import org.testng.annotations.Test; diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpQueryBase.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpQueryBase.java similarity index 99% rename from presto-clp/src/test/java/com/yscope/presto/TestClpQueryBase.java rename to presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpQueryBase.java index 8a73c0788a263..56b6a5bbb78b7 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpQueryBase.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpQueryBase.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto; +package com.facebook.presto.plugin.clp; import com.facebook.presto.Session; import com.facebook.presto.SystemSessionProperties; diff --git a/presto-clp/src/test/java/com/yscope/presto/TestClpSplit.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java similarity index 96% rename from presto-clp/src/test/java/com/yscope/presto/TestClpSplit.java rename to presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java index 4ad79aafa9d16..61024e81a2b9f 100644 --- a/presto-clp/src/test/java/com/yscope/presto/TestClpSplit.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java @@ -11,11 +11,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.yscope.presto; +package com.facebook.presto.plugin.clp; +import com.facebook.presto.plugin.clp.split.ClpMySqlSplitProvider; +import com.facebook.presto.plugin.clp.split.ClpSplitProvider; import com.facebook.presto.spi.SchemaTableName; -import com.yscope.presto.split.ClpMySQLSplitProvider; -import com.yscope.presto.split.ClpSplitProvider; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -126,7 +126,7 @@ public void tearDown() @Test public void testListSplits() { - ClpSplitProvider splitProvider = new ClpMySQLSplitProvider(config); + ClpSplitProvider splitProvider = new ClpMySqlSplitProvider(config); for (String tableName : TABLE_NAME_LIST) { ClpTableLayoutHandle layoutHandle = new ClpTableLayoutHandle(new ClpTableHandle(new SchemaTableName(TABLE_SCHEMA, tableName)), Optional.empty()); List splits = splitProvider.listSplits(layoutHandle); diff --git a/presto-server/src/main/provisio/presto.xml b/presto-server/src/main/provisio/presto.xml index 06f7caf37d093..6089d65f8b1e8 100644 --- a/presto-server/src/main/provisio/presto.xml +++ b/presto-server/src/main/provisio/presto.xml @@ -231,7 +231,7 @@ - + From 5771f1d5e93cabda62a2b3b03bfd5cb3fc6ffee7 Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 4 Apr 2025 00:18:11 +0000 Subject: [PATCH 123/126] add comments in ClpFilterToKqlConverter.java --- .../presto/plugin/clp/ClpFilterToKqlConverter.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFilterToKqlConverter.java index f3bc9a8adda6e..0f39a8f924f88 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFilterToKqlConverter.java @@ -251,7 +251,6 @@ private ClpExpression handleDereference(SpecialFormExpression expression) return handleDeferenceImpl(expression); } - // It currently only handles the case where there is a SQL wildcard in the middle of the string private ClpExpression handleLike(CallExpression node) { if (node.getArguments().size() != 2) { @@ -431,6 +430,11 @@ private ClpExpression interpretSubstringEquality(SubstrInfo info, String targetS return new ClpExpression(Optional.empty(), Optional.empty()); } + + /** + * Checks whether the given expression matches the pattern SUBSTR(x, ...) = 'someString', + * and if so, attempts to convert it into a KQL query using wildcards and construct a CLP expression. + */ private ClpExpression tryInterpretSubstringEquality( OperatorType operator, RowExpression possibleSubstring, @@ -454,6 +458,11 @@ private ClpExpression tryInterpretSubstringEquality( return interpretSubstringEquality(maybeSubstringCall.get(), targetString); } + /** + * Builds a CLP expression from a basic comparison between a variable and a literal. + * Handles different operator types (EQUAL, NOT_EQUAL, and logical binary ops like <, >, etc.) + * and formats them appropriately based on whether the literal is a string or a non-string type. + */ private ClpExpression buildClpExpression( String variableName, String literalString, From 4f042a23abb3356e7ca4a275c8c7c6aa6efe5e0d Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 4 Apr 2025 00:28:16 +0000 Subject: [PATCH 124/126] fix typo --- presto-server/src/main/provisio/presto.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-server/src/main/provisio/presto.xml b/presto-server/src/main/provisio/presto.xml index 6089d65f8b1e8..0c88f798c727b 100644 --- a/presto-server/src/main/provisio/presto.xml +++ b/presto-server/src/main/provisio/presto.xml @@ -231,7 +231,7 @@ - + From 943d5024a9800ad87cb5d5cf0c5543436686e9a3 Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 4 Apr 2025 01:00:25 +0000 Subject: [PATCH 125/126] add comments --- .../plugin/clp/ClpFilterToKqlConverter.java | 228 ++++++++++++------ .../clp/metadata/ClpMetadataProvider.java | 6 + .../plugin/clp/split/ClpSplitProvider.java | 3 + 3 files changed, 162 insertions(+), 75 deletions(-) diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFilterToKqlConverter.java index 0f39a8f924f88..beae6ca3819c8 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFilterToKqlConverter.java @@ -49,6 +49,22 @@ import static com.facebook.presto.spi.relation.SpecialFormExpression.Form.AND; import static java.util.Objects.requireNonNull; +/** + * ClpFilterToKqlConverter translates Presto RowExpressions into KQL (Kibana Query Language) filters + * used as CLP queries. This is used primarily for pushing down supported filters to the CLP engine. + * This class implements the RowExpressionVisitor interface and recursively walks Presto filter expressions, + * attempting to convert supported expressions (e.g., comparisons, logical AND/OR, LIKE, IN, IS NULL, + * and SUBSTR-based expressions) into corresponding KQL filter strings. Any part of the expression that + * cannot be translated is preserved as a "remaining expression" for potential fallback processing. + * Supported translations include: + * - Variable-to-literal comparisons (e.g., =, !=, <, >, <=, >=) + * - String pattern matches using LIKE + * - Membership checks using IN + * - NULL checks via IS NULL + * - Substring comparisons (e.g., SUBSTR(x, start, len) = "val") mapped to wildcard KQL queries + * - Dereferencing fields from row-typed variables + * - Logical operators AND, OR, and NOT + */ public class ClpFilterToKqlConverter implements RowExpressionVisitor { @@ -69,6 +85,68 @@ public ClpFilterToKqlConverter(StandardFunctionResolution standardFunctionResolu this.assignments = requireNonNull(assignments, "assignments is null"); } + @Override + public ClpExpression visitCall(CallExpression node, Void context) + { + FunctionHandle functionHandle = node.getFunctionHandle(); + if (standardFunctionResolution.isNotFunction(functionHandle)) { + return handleNot(node); + } + + if (standardFunctionResolution.isLikeFunction(functionHandle)) { + return handleLike(node); + } + + FunctionMetadata functionMetadata = functionMetadataManager.getFunctionMetadata(node.getFunctionHandle()); + Optional operatorTypeOptional = functionMetadata.getOperatorType(); + if (operatorTypeOptional.isPresent()) { + OperatorType operatorType = operatorTypeOptional.get(); + if (operatorType.isComparisonOperator() && operatorType != OperatorType.IS_DISTINCT_FROM) { + return handleLogicalBinary(operatorType, node); + } + } + + return new ClpExpression(node); + } + + @Override + public ClpExpression visitConstant(ConstantExpression node, Void context) + { + return new ClpExpression(getLiteralString(node)); + } + + @Override + public ClpExpression visitVariableReference(VariableReferenceExpression node, Void context) + { + return new ClpExpression(getVariableName(node)); + } + + @Override + public ClpExpression visitSpecialForm(SpecialFormExpression node, Void context) + { + switch (node.getForm()) { + case AND: + return handleAnd(node); + case OR: + return handleOr(node); + case IN: + return handleIn(node); + case IS_NULL: + return handleIsNull(node); + case DEREFERENCE: + return handleDereference(node); + default: + return new ClpExpression(node); + } + } + + // For all other expressions, return the original expression + @Override + public ClpExpression visitExpression(RowExpression node, Void context) + { + return new ClpExpression(node); + } + private static String getLiteralString(ConstantExpression literal) { if (literal.getValue() instanceof Slice) { @@ -82,6 +160,12 @@ private String getVariableName(VariableReferenceExpression variable) return ((ClpColumnHandle) assignments.get(variable)).getOriginalColumnName(); } + /** + * Handles the logical NOT expression. + * Example: + * Input: NOT (col1 = 5) + * Output: NOT col1: 5 + */ private ClpExpression handleNot(CallExpression node) { if (node.getArguments().size() != 1) { @@ -97,6 +181,14 @@ private ClpExpression handleNot(CallExpression node) return new ClpExpression("NOT " + expression.getDefinition().get()); } + /** + * Handles the logical AND expression. + * Combines all definable child expressions into a single KQL query joined by AND. + * Any unsupported children are collected into remaining expressions. + * Example: + * Input: col1 = 5 AND col2 = 'abc' + * Output: (col1: 5 AND col2: "abc") + */ private ClpExpression handleAnd(SpecialFormExpression node) { StringBuilder queryBuilder = new StringBuilder(); @@ -134,6 +226,14 @@ else if (!remainingExpressions.isEmpty()) { return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 5) + ")"); } + /** + * Handles the logical OR expression. + * Combines all fully convertible child expressions into a single CLP query joined by OR. + * Returns the original node if any child is unsupported. + * Example: + * Input: col1 = 5 OR col1 = 10 + * Output: (col1: 5 OR col1: 10) + */ private ClpExpression handleOr(SpecialFormExpression node) { StringBuilder queryBuilder = new StringBuilder(); @@ -151,6 +251,12 @@ private ClpExpression handleOr(SpecialFormExpression node) return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 4) + ")"); } + /** + * Handles the IN predicate. + * Example: + * Input: col1 IN (1, 2, 3) + * Output: (col1: 1 OR col1: 2 OR col1: 3) + */ private ClpExpression handleIn(SpecialFormExpression node) { ClpExpression variable = node.getArguments().get(0).accept(this, null); @@ -179,6 +285,12 @@ private ClpExpression handleIn(SpecialFormExpression node) return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 4) + ")"); } + /** + * Handles the IS NULL predicate. + * Example: + * Input: col1 IS NULL + * Output: NOT col1: * + */ private ClpExpression handleIsNull(SpecialFormExpression node) { if (node.getArguments().size() != 1) { @@ -195,17 +307,24 @@ private ClpExpression handleIsNull(SpecialFormExpression node) return new ClpExpression(String.format("NOT %s: *", variableName)); } - private ClpExpression handleDeferenceImpl(RowExpression node) + /** + * Handles dereference expressions on RowTypes (e.g., col.row_field). + * Converts row dereferences into dot-separated field access. + * Example: + * Input: address.city (from a RowType 'address') + * Output: address.city + */ + private ClpExpression handleDereference(RowExpression expression) { - if (node instanceof VariableReferenceExpression) { - return node.accept(this, null); + if (expression instanceof VariableReferenceExpression) { + return expression.accept(this, null); } - if (!(node instanceof SpecialFormExpression)) { - return new ClpExpression(node); + if (!(expression instanceof SpecialFormExpression)) { + return new ClpExpression(expression); } - SpecialFormExpression specialForm = (SpecialFormExpression) node; + SpecialFormExpression specialForm = (SpecialFormExpression) expression; List arguments = specialForm.getArguments(); if (arguments.size() != 2) { throw new PrestoException(CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, "DEREFERENCE expects 2 arguments"); @@ -239,18 +358,21 @@ private ClpExpression handleDeferenceImpl(RowExpression node) RowType.Field field = rowType.getFields().get(fieldIndex); String fieldName = field.getName().orElse("field" + fieldIndex); - ClpExpression baseString = handleDeferenceImpl(base); + ClpExpression baseString = handleDereference(base); if (!baseString.getDefinition().isPresent()) { - return new ClpExpression(node); + return new ClpExpression(expression); } return new ClpExpression(baseString.getDefinition().get() + "." + fieldName); } - private ClpExpression handleDereference(SpecialFormExpression expression) - { - return handleDeferenceImpl(expression); - } - + /** + * Handles LIKE expressions. + * Transforms SQL LIKE into KQL queries using wildcards (* and ?). + * Supports constant patterns or constant casts only. + * Example: + * Input: col1 LIKE 'a_bc%' + * Output: col1: "a?bc*" + */ private ClpExpression handleLike(CallExpression node) { if (node.getArguments().size() != 2) { @@ -387,6 +509,15 @@ private Optional parseLengthLiteralOrFunction(RowExpression lengthExpre /** * Translate SUBSTR(x, start) or SUBSTR(x, start, length) = 'someString' to KQL. + * Examples: + * SUBSTR(message, 1, 3) = 'abc' + * → message: "abc*" + * SUBSTR(message, 4, 3) = 'abc' + * → message: "???abc*" + * SUBSTR(message, 2) = 'hello' + * → message: "?hello" + * SUBSTR(message, -5) = 'hello' + * → message: "*hello" */ private ClpExpression interpretSubstringEquality(SubstrInfo info, String targetString) { @@ -462,6 +593,10 @@ private ClpExpression tryInterpretSubstringEquality( * Builds a CLP expression from a basic comparison between a variable and a literal. * Handles different operator types (EQUAL, NOT_EQUAL, and logical binary ops like <, >, etc.) * and formats them appropriately based on whether the literal is a string or a non-string type. + * Examples: + * col = 'abc' → col: "abc" + * col != 42 → NOT col: 42 + * 5 < col → col > 5 */ private ClpExpression buildClpExpression( String variableName, @@ -492,6 +627,11 @@ else if (LOGICAL_BINARY_OPS_FILTER.contains(operator) && !(literalType instanceo return new ClpExpression(originalNode); } + /** + * Handles logical binary operators (e.g., =, !=, <, >) between two expressions. + * Supports constant on either side by flipping the operator when needed. + * Also checks for SUBSTR(x, ...) = 'value' patterns and delegates to substring handler. + */ private ClpExpression handleLogicalBinary(OperatorType operator, CallExpression node) { if (node.getArguments().size() != 2) { @@ -545,66 +685,4 @@ else if (leftIsConstant) { // fallback return new ClpExpression(node); } - - @Override - public ClpExpression visitCall(CallExpression node, Void context) - { - FunctionHandle functionHandle = node.getFunctionHandle(); - if (standardFunctionResolution.isNotFunction(functionHandle)) { - return handleNot(node); - } - - if (standardFunctionResolution.isLikeFunction(functionHandle)) { - return handleLike(node); - } - - FunctionMetadata functionMetadata = functionMetadataManager.getFunctionMetadata(node.getFunctionHandle()); - Optional operatorTypeOptional = functionMetadata.getOperatorType(); - if (operatorTypeOptional.isPresent()) { - OperatorType operatorType = operatorTypeOptional.get(); - if (operatorType.isComparisonOperator() && operatorType != OperatorType.IS_DISTINCT_FROM) { - return handleLogicalBinary(operatorType, node); - } - } - - return new ClpExpression(node); - } - - @Override - public ClpExpression visitConstant(ConstantExpression node, Void context) - { - return new ClpExpression(getLiteralString(node)); - } - - @Override - public ClpExpression visitVariableReference(VariableReferenceExpression node, Void context) - { - return new ClpExpression(getVariableName(node)); - } - - @Override - public ClpExpression visitSpecialForm(SpecialFormExpression node, Void context) - { - switch (node.getForm()) { - case AND: - return handleAnd(node); - case OR: - return handleOr(node); - case IN: - return handleIn(node); - case IS_NULL: - return handleIsNull(node); - case DEREFERENCE: - return handleDereference(node); - default: - return new ClpExpression(node); - } - } - - // For all other expressions, return the original expression - @Override - public ClpExpression visitExpression(RowExpression node, Void context) - { - return new ClpExpression(node); - } } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMetadataProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMetadataProvider.java index 4f125e601bc03..7ed353c7a222c 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMetadataProvider.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMetadataProvider.java @@ -20,7 +20,13 @@ public interface ClpMetadataProvider { + /** + * Returns the list of column handles for the given table. + */ public List listColumnHandles(SchemaTableName schemaTableName); + /** + * Returns the list of table names in the given schema. + */ public List listTableNames(String schema); } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitProvider.java index 17cd6dc81d7f6..c3686be6f0e5f 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitProvider.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitProvider.java @@ -20,5 +20,8 @@ public interface ClpSplitProvider { + /** + * Returns a list of splits for the given table layout handle. + */ List listSplits(ClpTableLayoutHandle clpTableLayoutHandle); } From f36b5b8d813e5fd08fef88ca3fe2d3b780f3226e Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 4 Apr 2025 01:05:25 +0000 Subject: [PATCH 126/126] remove the empty line --- .../com/facebook/presto/plugin/clp/ClpFilterToKqlConverter.java | 1 - 1 file changed, 1 deletion(-) diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFilterToKqlConverter.java index beae6ca3819c8..e332534aadd7b 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFilterToKqlConverter.java @@ -561,7 +561,6 @@ private ClpExpression interpretSubstringEquality(SubstrInfo info, String targetS return new ClpExpression(Optional.empty(), Optional.empty()); } - /** * Checks whether the given expression matches the pattern SUBSTR(x, ...) = 'someString', * and if so, attempts to convert it into a KQL query using wildcards and construct a CLP expression.