From 92cd842db4594b44f2fe48edd26164c6ea108b6a Mon Sep 17 00:00:00 2001 From: Zehua Zou Date: Fri, 30 Jan 2026 18:31:06 +0800 Subject: [PATCH 1/3] correct variant's extension name --- cpp/src/parquet/arrow/schema.cc | 6 +++--- cpp/src/parquet/arrow/variant_internal.cc | 2 +- cpp/src/parquet/arrow/variant_internal.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc index 266215a8104..37fa56656ae 100644 --- a/cpp/src/parquet/arrow/schema.cc +++ b/cpp/src/parquet/arrow/schema.cc @@ -485,7 +485,7 @@ Status FieldToNode(const std::string& name, const std::shared_ptr& field, ARROW_ASSIGN_OR_RAISE(logical_type, LogicalTypeFromGeoArrowMetadata(ext_type->Serialize())); break; - } else if (ext_type->extension_name() == std::string("parquet.variant")) { + } else if (ext_type->extension_name() == std::string("arrow.parquet.variant")) { auto variant_type = std::static_pointer_cast(field->type()); return VariantToNode(variant_type, name, field->nullable(), field_id, properties, @@ -597,7 +597,7 @@ Status GroupToStruct(const GroupNode& node, LevelInfo current_levels, auto struct_type = ::arrow::struct_(arrow_fields); if (ctx->properties.get_arrow_extensions_enabled() && node.logical_type()->is_variant()) { - auto extension_type = ::arrow::GetExtensionType("parquet.variant"); + auto extension_type = ::arrow::GetExtensionType("arrow.parquet.variant"); if (extension_type) { ARROW_ASSIGN_OR_RAISE( struct_type, @@ -1147,7 +1147,7 @@ Result ApplyOriginalMetadata(const Field& origin_field, SchemaField* infer extension_supports_inferred_storage = arrow_extension_inferred || ::arrow::extension::UuidType::IsSupportedStorageType(inferred_type); - } else if (origin_extension_name == "parquet.variant") { + } else if (origin_extension_name == "arrow.parquet.variant") { extension_supports_inferred_storage = arrow_extension_inferred || VariantExtensionType::IsSupportedStorageType(inferred_type); diff --git a/cpp/src/parquet/arrow/variant_internal.cc b/cpp/src/parquet/arrow/variant_internal.cc index 87f88efaac7..1a450d493f1 100644 --- a/cpp/src/parquet/arrow/variant_internal.cc +++ b/cpp/src/parquet/arrow/variant_internal.cc @@ -66,7 +66,7 @@ std::string VariantExtensionType::Serialize() const { return ""; } std::shared_ptr VariantExtensionType::MakeArray( std::shared_ptr data) const { DCHECK_EQ(data->type->id(), Type::EXTENSION); - DCHECK_EQ("parquet.variant", + DCHECK_EQ("arrow.parquet.variant", ::arrow::internal::checked_cast(*data->type) .extension_name()); return std::make_shared(data); diff --git a/cpp/src/parquet/arrow/variant_internal.h b/cpp/src/parquet/arrow/variant_internal.h index d0b77c72c61..278725d52c1 100644 --- a/cpp/src/parquet/arrow/variant_internal.h +++ b/cpp/src/parquet/arrow/variant_internal.h @@ -50,7 +50,7 @@ class PARQUET_EXPORT VariantExtensionType : public ::arrow::ExtensionType { public: explicit VariantExtensionType(const std::shared_ptr<::arrow::DataType>& storage_type); - std::string extension_name() const override { return "parquet.variant"; } + std::string extension_name() const override { return "arrow.parquet.variant"; } bool ExtensionEquals(const ::arrow::ExtensionType& other) const override; From 1794f6f8277c4a32084dc2daa41efd201d349520 Mon Sep 17 00:00:00 2001 From: Zehua Zou Date: Wed, 4 Feb 2026 11:19:23 +0800 Subject: [PATCH 2/3] move variant_internal.cc to arrow/extension/parquet_variant.cc --- cpp/src/arrow/CMakeLists.txt | 1 + .../extension/parquet_variant.cc} | 6 +++--- .../extension/parquet_variant.h} | 5 ++--- cpp/src/parquet/CMakeLists.txt | 1 - cpp/src/parquet/arrow/arrow_schema_test.cc | 4 ++-- cpp/src/parquet/arrow/schema.cc | 17 ++++++++------- cpp/src/parquet/arrow/variant_test.cc | 21 +++++++++++-------- 7 files changed, 30 insertions(+), 25 deletions(-) rename cpp/src/{parquet/arrow/variant_internal.cc => arrow/extension/parquet_variant.cc} (97%) rename cpp/src/{parquet/arrow/variant_internal.h => arrow/extension/parquet_variant.h} (97%) diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index d9f04a627bc..6e9d76a61e0 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -366,6 +366,7 @@ set(ARROW_SRCS extension_type.cc extension/bool8.cc extension/json.cc + extension/parquet_variant.cc extension/uuid.cc pretty_print.cc record_batch.cc diff --git a/cpp/src/parquet/arrow/variant_internal.cc b/cpp/src/arrow/extension/parquet_variant.cc similarity index 97% rename from cpp/src/parquet/arrow/variant_internal.cc rename to cpp/src/arrow/extension/parquet_variant.cc index 1a450d493f1..93fabedf7c7 100644 --- a/cpp/src/parquet/arrow/variant_internal.cc +++ b/cpp/src/arrow/extension/parquet_variant.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "parquet/arrow/variant_internal.h" +#include "arrow/extension/parquet_variant.h" #include @@ -25,7 +25,7 @@ #include "arrow/type_fwd.h" #include "arrow/util/logging_internal.h" -namespace parquet::arrow { +namespace arrow::extension { using ::arrow::Array; using ::arrow::ArrayData; @@ -130,4 +130,4 @@ std::shared_ptr variant(std::shared_ptr storage_type) { return VariantExtensionType::Make(std::move(storage_type)).ValueOrDie(); } -} // namespace parquet::arrow +} // namespace arrow::extension diff --git a/cpp/src/parquet/arrow/variant_internal.h b/cpp/src/arrow/extension/parquet_variant.h similarity index 97% rename from cpp/src/parquet/arrow/variant_internal.h rename to cpp/src/arrow/extension/parquet_variant.h index 278725d52c1..e91ab80b339 100644 --- a/cpp/src/parquet/arrow/variant_internal.h +++ b/cpp/src/arrow/extension/parquet_variant.h @@ -17,13 +17,12 @@ #pragma once -#include #include #include "arrow/extension_type.h" #include "parquet/platform.h" -namespace parquet::arrow { +namespace arrow::extension { class PARQUET_EXPORT VariantArray : public ::arrow::ExtensionArray { public: @@ -83,4 +82,4 @@ class PARQUET_EXPORT VariantExtensionType : public ::arrow::ExtensionType { PARQUET_EXPORT std::shared_ptr<::arrow::DataType> variant( std::shared_ptr<::arrow::DataType> storage_type); -} // namespace parquet::arrow +} // namespace arrow::extension diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index feeb1805f63..6c1550dcc2f 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -157,7 +157,6 @@ set(PARQUET_SRCS arrow/reader_internal.cc arrow/schema.cc arrow/schema_internal.cc - arrow/variant_internal.cc arrow/writer.cc bloom_filter.cc bloom_filter_reader.cc diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc b/cpp/src/parquet/arrow/arrow_schema_test.cc index 73ce8ea69e3..f930d3d7bdf 100644 --- a/cpp/src/parquet/arrow/arrow_schema_test.cc +++ b/cpp/src/parquet/arrow/arrow_schema_test.cc @@ -25,7 +25,6 @@ #include "parquet/arrow/reader.h" #include "parquet/arrow/reader_internal.h" #include "parquet/arrow/schema.h" -#include "parquet/arrow/variant_internal.h" #include "parquet/file_reader.h" #include "parquet/schema.h" #include "parquet/schema_internal.h" @@ -34,6 +33,7 @@ #include "arrow/array.h" #include "arrow/extension/json.h" +#include "arrow/extension/parquet_variant.h" #include "arrow/extension/uuid.h" #include "arrow/ipc/writer.h" #include "arrow/testing/extension_type.h" @@ -950,7 +950,7 @@ TEST_F(TestConvertParquetSchema, ParquetVariant) { auto arrow_metadata = ::arrow::field("metadata", ::arrow::binary(), /*nullable=*/false); auto arrow_value = ::arrow::field("value", ::arrow::binary(), /*nullable=*/false); auto arrow_variant = ::arrow::struct_({arrow_metadata, arrow_value}); - auto variant_extension = std::make_shared(arrow_variant); + auto variant_extension = ::arrow::extension::variant(arrow_variant); { // Parquet file does not contain Arrow schema. diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc index 37fa56656ae..9c0db1d5335 100644 --- a/cpp/src/parquet/arrow/schema.cc +++ b/cpp/src/parquet/arrow/schema.cc @@ -22,6 +22,7 @@ #include #include "arrow/extension/json.h" +#include "arrow/extension/parquet_variant.h" #include "arrow/extension/uuid.h" #include "arrow/extension_type.h" #include "arrow/io/memory.h" @@ -36,7 +37,6 @@ #include "arrow/util/value_parsing.h" #include "parquet/arrow/schema_internal.h" -#include "parquet/arrow/variant_internal.h" #include "parquet/exception.h" #include "parquet/geospatial/util_json_internal.h" #include "parquet/metadata.h" @@ -129,10 +129,11 @@ Status MapToNode(const std::shared_ptr<::arrow::MapType>& type, const std::strin return Status::OK(); } -Status VariantToNode(const std::shared_ptr& type, - const std::string& name, bool nullable, int field_id, - const WriterProperties& properties, - const ArrowWriterProperties& arrow_properties, NodePtr* out) { +Status VariantToNode( + const std::shared_ptr<::arrow::extension::VariantExtensionType>& type, + const std::string& name, bool nullable, int field_id, + const WriterProperties& properties, const ArrowWriterProperties& arrow_properties, + NodePtr* out) { NodePtr metadata_node; RETURN_NOT_OK(FieldToNode("metadata", type->metadata(), properties, arrow_properties, &metadata_node)); @@ -486,7 +487,9 @@ Status FieldToNode(const std::string& name, const std::shared_ptr& field, LogicalTypeFromGeoArrowMetadata(ext_type->Serialize())); break; } else if (ext_type->extension_name() == std::string("arrow.parquet.variant")) { - auto variant_type = std::static_pointer_cast(field->type()); + auto variant_type = + std::static_pointer_cast<::arrow::extension::VariantExtensionType>( + field->type()); return VariantToNode(variant_type, name, field->nullable(), field_id, properties, arrow_properties, out); @@ -1150,7 +1153,7 @@ Result ApplyOriginalMetadata(const Field& origin_field, SchemaField* infer } else if (origin_extension_name == "arrow.parquet.variant") { extension_supports_inferred_storage = arrow_extension_inferred || - VariantExtensionType::IsSupportedStorageType(inferred_type); + ::arrow::extension::VariantExtensionType::IsSupportedStorageType(inferred_type); } else { extension_supports_inferred_storage = origin_extension_type.storage_type()->Equals(*inferred_type); diff --git a/cpp/src/parquet/arrow/variant_test.cc b/cpp/src/parquet/arrow/variant_test.cc index caf63d8e3d7..04f46d2e444 100644 --- a/cpp/src/parquet/arrow/variant_test.cc +++ b/cpp/src/parquet/arrow/variant_test.cc @@ -15,9 +15,8 @@ // specific language governing permissions and limitations // under the License. -#include "parquet/arrow/variant_internal.h" - #include "arrow/array/validate.h" +#include "arrow/extension/parquet_variant.h" #include "arrow/ipc/test_common.h" #include "arrow/record_batch.h" #include "arrow/testing/gtest_util.h" @@ -29,16 +28,20 @@ using ::arrow::binary; using ::arrow::struct_; TEST(TestVariantExtensionType, StorageTypeValidation) { - auto variant1 = variant(struct_({field("metadata", binary(), /*nullable=*/false), - field("value", binary(), /*nullable=*/false)})); - auto variant2 = variant(struct_({field("metadata", binary(), /*nullable=*/false), - field("value", binary(), /*nullable=*/false)})); + auto variant1 = ::arrow::extension::variant( + struct_({field("metadata", binary(), /*nullable=*/false), + field("value", binary(), /*nullable=*/false)})); + auto variant2 = ::arrow::extension::variant( + struct_({field("metadata", binary(), /*nullable=*/false), + field("value", binary(), /*nullable=*/false)})); ASSERT_TRUE(variant1->Equals(variant2)); // Metadata and value fields can be provided in either order - auto variantFieldsFlipped = std::dynamic_pointer_cast( - variant(struct_({field("value", binary(), /*nullable=*/false), + auto variantFieldsFlipped = + std::dynamic_pointer_cast<::arrow::extension::VariantExtensionType>( + ::arrow::extension::variant( + struct_({field("value", binary(), /*nullable=*/false), field("metadata", binary(), /*nullable=*/false)}))); ASSERT_EQ("metadata", variantFieldsFlipped->metadata()->name()); @@ -62,7 +65,7 @@ TEST(TestVariantExtensionType, StorageTypeValidation) { Invalid, "Invalid: Invalid storage type for VariantExtensionType: " + storage_type->ToString(), - VariantExtensionType::Make(storage_type)); + ::arrow::extension::VariantExtensionType::Make(storage_type)); } } From 930444b275a0969c9003a5177a80c714a3220768 Mon Sep 17 00:00:00 2001 From: Zehua Zou Date: Wed, 4 Feb 2026 15:08:01 +0800 Subject: [PATCH 3/3] fix windows test compile --- cpp/src/arrow/extension/parquet_variant.cc | 22 ++++--------- cpp/src/arrow/extension/parquet_variant.h | 36 ++++++++++------------ 2 files changed, 22 insertions(+), 36 deletions(-) diff --git a/cpp/src/arrow/extension/parquet_variant.cc b/cpp/src/arrow/extension/parquet_variant.cc index 93fabedf7c7..95aa5a0eb68 100644 --- a/cpp/src/arrow/extension/parquet_variant.cc +++ b/cpp/src/arrow/extension/parquet_variant.cc @@ -22,21 +22,12 @@ #include "arrow/extension_type.h" #include "arrow/result.h" #include "arrow/status.h" -#include "arrow/type_fwd.h" #include "arrow/util/logging_internal.h" namespace arrow::extension { -using ::arrow::Array; -using ::arrow::ArrayData; -using ::arrow::DataType; -using ::arrow::ExtensionType; -using ::arrow::Result; -using ::arrow::Type; - -VariantExtensionType::VariantExtensionType( - const std::shared_ptr<::arrow::DataType>& storage_type) - : ::arrow::ExtensionType(storage_type) { +VariantExtensionType::VariantExtensionType(const std::shared_ptr& storage_type) + : ExtensionType(storage_type) { // GH-45948: Shredded variants will need to handle an optional shredded_value as // well as value_ becoming optional. @@ -67,13 +58,12 @@ std::shared_ptr VariantExtensionType::MakeArray( std::shared_ptr data) const { DCHECK_EQ(data->type->id(), Type::EXTENSION); DCHECK_EQ("arrow.parquet.variant", - ::arrow::internal::checked_cast(*data->type) - .extension_name()); + internal::checked_cast(*data->type).extension_name()); return std::make_shared(data); } namespace { -bool IsBinaryField(const std::shared_ptr<::arrow::Field> field) { +bool IsBinaryField(const std::shared_ptr field) { return field->type()->storage_id() == Type::BINARY || field->type()->storage_id() == Type::LARGE_BINARY; } @@ -116,8 +106,8 @@ bool VariantExtensionType::IsSupportedStorageType( Result> VariantExtensionType::Make( std::shared_ptr storage_type) { if (!IsSupportedStorageType(storage_type)) { - return ::arrow::Status::Invalid("Invalid storage type for VariantExtensionType: ", - storage_type->ToString()); + return Status::Invalid("Invalid storage type for VariantExtensionType: ", + storage_type->ToString()); } return std::make_shared(std::move(storage_type)); diff --git a/cpp/src/arrow/extension/parquet_variant.h b/cpp/src/arrow/extension/parquet_variant.h index e91ab80b339..be90923f14e 100644 --- a/cpp/src/arrow/extension/parquet_variant.h +++ b/cpp/src/arrow/extension/parquet_variant.h @@ -20,13 +20,13 @@ #include #include "arrow/extension_type.h" -#include "parquet/platform.h" +#include "arrow/util/visibility.h" namespace arrow::extension { -class PARQUET_EXPORT VariantArray : public ::arrow::ExtensionArray { +class ARROW_EXPORT VariantArray : public ExtensionArray { public: - using ::arrow::ExtensionArray::ExtensionArray; + using ExtensionArray::ExtensionArray; }; /// EXPERIMENTAL: Variant is not yet fully supported. @@ -45,41 +45,37 @@ class PARQUET_EXPORT VariantArray : public ::arrow::ExtensionArray { /// /// To read more about variant shredding, see the variant shredding spec at /// https://github.com/apache/parquet-format/blob/master/VariantShredding.md -class PARQUET_EXPORT VariantExtensionType : public ::arrow::ExtensionType { +class ARROW_EXPORT VariantExtensionType : public ExtensionType { public: - explicit VariantExtensionType(const std::shared_ptr<::arrow::DataType>& storage_type); + explicit VariantExtensionType(const std::shared_ptr& storage_type); std::string extension_name() const override { return "arrow.parquet.variant"; } - bool ExtensionEquals(const ::arrow::ExtensionType& other) const override; + bool ExtensionEquals(const ExtensionType& other) const override; - ::arrow::Result> Deserialize( - std::shared_ptr<::arrow::DataType> storage_type, + Result> Deserialize( + std::shared_ptr storage_type, const std::string& serialized_data) const override; std::string Serialize() const override; - std::shared_ptr<::arrow::Array> MakeArray( - std::shared_ptr<::arrow::ArrayData> data) const override; + std::shared_ptr MakeArray(std::shared_ptr data) const override; - static ::arrow::Result> Make( - std::shared_ptr<::arrow::DataType> storage_type); + static Result> Make(std::shared_ptr storage_type); - static bool IsSupportedStorageType( - const std::shared_ptr<::arrow::DataType>& storage_type); + static bool IsSupportedStorageType(const std::shared_ptr& storage_type); - std::shared_ptr<::arrow::Field> metadata() const { return metadata_; } + std::shared_ptr metadata() const { return metadata_; } - std::shared_ptr<::arrow::Field> value() const { return value_; } + std::shared_ptr value() const { return value_; } private: // TODO GH-45948 added shredded_value - std::shared_ptr<::arrow::Field> metadata_; - std::shared_ptr<::arrow::Field> value_; + std::shared_ptr metadata_; + std::shared_ptr value_; }; /// \brief Return a VariantExtensionType instance. -PARQUET_EXPORT std::shared_ptr<::arrow::DataType> variant( - std::shared_ptr<::arrow::DataType> storage_type); +ARROW_EXPORT std::shared_ptr variant(std::shared_ptr storage_type); } // namespace arrow::extension