diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index d9f04a627bc5..6e9d76a61e05 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -366,6 +366,7 @@ set(ARROW_SRCS extension_type.cc extension/bool8.cc extension/json.cc + extension/parquet_variant.cc extension/uuid.cc pretty_print.cc record_batch.cc diff --git a/cpp/src/parquet/arrow/variant_internal.cc b/cpp/src/arrow/extension/parquet_variant.cc similarity index 84% rename from cpp/src/parquet/arrow/variant_internal.cc rename to cpp/src/arrow/extension/parquet_variant.cc index 87f88efaac75..95aa5a0eb68e 100644 --- a/cpp/src/parquet/arrow/variant_internal.cc +++ b/cpp/src/arrow/extension/parquet_variant.cc @@ -15,28 +15,19 @@ // specific language governing permissions and limitations // under the License. -#include "parquet/arrow/variant_internal.h" +#include "arrow/extension/parquet_variant.h" #include #include "arrow/extension_type.h" #include "arrow/result.h" #include "arrow/status.h" -#include "arrow/type_fwd.h" #include "arrow/util/logging_internal.h" -namespace parquet::arrow { +namespace arrow::extension { -using ::arrow::Array; -using ::arrow::ArrayData; -using ::arrow::DataType; -using ::arrow::ExtensionType; -using ::arrow::Result; -using ::arrow::Type; - -VariantExtensionType::VariantExtensionType( - const std::shared_ptr<::arrow::DataType>& storage_type) - : ::arrow::ExtensionType(storage_type) { +VariantExtensionType::VariantExtensionType(const std::shared_ptr& storage_type) + : ExtensionType(storage_type) { // GH-45948: Shredded variants will need to handle an optional shredded_value as // well as value_ becoming optional. @@ -66,14 +57,13 @@ std::string VariantExtensionType::Serialize() const { return ""; } std::shared_ptr VariantExtensionType::MakeArray( std::shared_ptr data) const { DCHECK_EQ(data->type->id(), Type::EXTENSION); - DCHECK_EQ("parquet.variant", - ::arrow::internal::checked_cast(*data->type) - .extension_name()); + DCHECK_EQ("arrow.parquet.variant", + internal::checked_cast(*data->type).extension_name()); return std::make_shared(data); } namespace { -bool IsBinaryField(const std::shared_ptr<::arrow::Field> field) { +bool IsBinaryField(const std::shared_ptr field) { return field->type()->storage_id() == Type::BINARY || field->type()->storage_id() == Type::LARGE_BINARY; } @@ -116,8 +106,8 @@ bool VariantExtensionType::IsSupportedStorageType( Result> VariantExtensionType::Make( std::shared_ptr storage_type) { if (!IsSupportedStorageType(storage_type)) { - return ::arrow::Status::Invalid("Invalid storage type for VariantExtensionType: ", - storage_type->ToString()); + return Status::Invalid("Invalid storage type for VariantExtensionType: ", + storage_type->ToString()); } return std::make_shared(std::move(storage_type)); @@ -130,4 +120,4 @@ std::shared_ptr variant(std::shared_ptr storage_type) { return VariantExtensionType::Make(std::move(storage_type)).ValueOrDie(); } -} // namespace parquet::arrow +} // namespace arrow::extension diff --git a/cpp/src/parquet/arrow/variant_internal.h b/cpp/src/arrow/extension/parquet_variant.h similarity index 56% rename from cpp/src/parquet/arrow/variant_internal.h rename to cpp/src/arrow/extension/parquet_variant.h index d0b77c72c619..be90923f14e6 100644 --- a/cpp/src/parquet/arrow/variant_internal.h +++ b/cpp/src/arrow/extension/parquet_variant.h @@ -17,17 +17,16 @@ #pragma once -#include #include #include "arrow/extension_type.h" -#include "parquet/platform.h" +#include "arrow/util/visibility.h" -namespace parquet::arrow { +namespace arrow::extension { -class PARQUET_EXPORT VariantArray : public ::arrow::ExtensionArray { +class ARROW_EXPORT VariantArray : public ExtensionArray { public: - using ::arrow::ExtensionArray::ExtensionArray; + using ExtensionArray::ExtensionArray; }; /// EXPERIMENTAL: Variant is not yet fully supported. @@ -46,41 +45,37 @@ class PARQUET_EXPORT VariantArray : public ::arrow::ExtensionArray { /// /// To read more about variant shredding, see the variant shredding spec at /// https://github.com/apache/parquet-format/blob/master/VariantShredding.md -class PARQUET_EXPORT VariantExtensionType : public ::arrow::ExtensionType { +class ARROW_EXPORT VariantExtensionType : public ExtensionType { public: - explicit VariantExtensionType(const std::shared_ptr<::arrow::DataType>& storage_type); + explicit VariantExtensionType(const std::shared_ptr& storage_type); - std::string extension_name() const override { return "parquet.variant"; } + std::string extension_name() const override { return "arrow.parquet.variant"; } - bool ExtensionEquals(const ::arrow::ExtensionType& other) const override; + bool ExtensionEquals(const ExtensionType& other) const override; - ::arrow::Result> Deserialize( - std::shared_ptr<::arrow::DataType> storage_type, + Result> Deserialize( + std::shared_ptr storage_type, const std::string& serialized_data) const override; std::string Serialize() const override; - std::shared_ptr<::arrow::Array> MakeArray( - std::shared_ptr<::arrow::ArrayData> data) const override; + std::shared_ptr MakeArray(std::shared_ptr data) const override; - static ::arrow::Result> Make( - std::shared_ptr<::arrow::DataType> storage_type); + static Result> Make(std::shared_ptr storage_type); - static bool IsSupportedStorageType( - const std::shared_ptr<::arrow::DataType>& storage_type); + static bool IsSupportedStorageType(const std::shared_ptr& storage_type); - std::shared_ptr<::arrow::Field> metadata() const { return metadata_; } + std::shared_ptr metadata() const { return metadata_; } - std::shared_ptr<::arrow::Field> value() const { return value_; } + std::shared_ptr value() const { return value_; } private: // TODO GH-45948 added shredded_value - std::shared_ptr<::arrow::Field> metadata_; - std::shared_ptr<::arrow::Field> value_; + std::shared_ptr metadata_; + std::shared_ptr value_; }; /// \brief Return a VariantExtensionType instance. -PARQUET_EXPORT std::shared_ptr<::arrow::DataType> variant( - std::shared_ptr<::arrow::DataType> storage_type); +ARROW_EXPORT std::shared_ptr variant(std::shared_ptr storage_type); -} // namespace parquet::arrow +} // namespace arrow::extension diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index feeb1805f639..6c1550dcc2f7 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -157,7 +157,6 @@ set(PARQUET_SRCS arrow/reader_internal.cc arrow/schema.cc arrow/schema_internal.cc - arrow/variant_internal.cc arrow/writer.cc bloom_filter.cc bloom_filter_reader.cc diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc b/cpp/src/parquet/arrow/arrow_schema_test.cc index 73ce8ea69e3c..f930d3d7bdf7 100644 --- a/cpp/src/parquet/arrow/arrow_schema_test.cc +++ b/cpp/src/parquet/arrow/arrow_schema_test.cc @@ -25,7 +25,6 @@ #include "parquet/arrow/reader.h" #include "parquet/arrow/reader_internal.h" #include "parquet/arrow/schema.h" -#include "parquet/arrow/variant_internal.h" #include "parquet/file_reader.h" #include "parquet/schema.h" #include "parquet/schema_internal.h" @@ -34,6 +33,7 @@ #include "arrow/array.h" #include "arrow/extension/json.h" +#include "arrow/extension/parquet_variant.h" #include "arrow/extension/uuid.h" #include "arrow/ipc/writer.h" #include "arrow/testing/extension_type.h" @@ -950,7 +950,7 @@ TEST_F(TestConvertParquetSchema, ParquetVariant) { auto arrow_metadata = ::arrow::field("metadata", ::arrow::binary(), /*nullable=*/false); auto arrow_value = ::arrow::field("value", ::arrow::binary(), /*nullable=*/false); auto arrow_variant = ::arrow::struct_({arrow_metadata, arrow_value}); - auto variant_extension = std::make_shared(arrow_variant); + auto variant_extension = ::arrow::extension::variant(arrow_variant); { // Parquet file does not contain Arrow schema. diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc index 266215a8104e..9c0db1d5335f 100644 --- a/cpp/src/parquet/arrow/schema.cc +++ b/cpp/src/parquet/arrow/schema.cc @@ -22,6 +22,7 @@ #include #include "arrow/extension/json.h" +#include "arrow/extension/parquet_variant.h" #include "arrow/extension/uuid.h" #include "arrow/extension_type.h" #include "arrow/io/memory.h" @@ -36,7 +37,6 @@ #include "arrow/util/value_parsing.h" #include "parquet/arrow/schema_internal.h" -#include "parquet/arrow/variant_internal.h" #include "parquet/exception.h" #include "parquet/geospatial/util_json_internal.h" #include "parquet/metadata.h" @@ -129,10 +129,11 @@ Status MapToNode(const std::shared_ptr<::arrow::MapType>& type, const std::strin return Status::OK(); } -Status VariantToNode(const std::shared_ptr& type, - const std::string& name, bool nullable, int field_id, - const WriterProperties& properties, - const ArrowWriterProperties& arrow_properties, NodePtr* out) { +Status VariantToNode( + const std::shared_ptr<::arrow::extension::VariantExtensionType>& type, + const std::string& name, bool nullable, int field_id, + const WriterProperties& properties, const ArrowWriterProperties& arrow_properties, + NodePtr* out) { NodePtr metadata_node; RETURN_NOT_OK(FieldToNode("metadata", type->metadata(), properties, arrow_properties, &metadata_node)); @@ -485,8 +486,10 @@ Status FieldToNode(const std::string& name, const std::shared_ptr& field, ARROW_ASSIGN_OR_RAISE(logical_type, LogicalTypeFromGeoArrowMetadata(ext_type->Serialize())); break; - } else if (ext_type->extension_name() == std::string("parquet.variant")) { - auto variant_type = std::static_pointer_cast(field->type()); + } else if (ext_type->extension_name() == std::string("arrow.parquet.variant")) { + auto variant_type = + std::static_pointer_cast<::arrow::extension::VariantExtensionType>( + field->type()); return VariantToNode(variant_type, name, field->nullable(), field_id, properties, arrow_properties, out); @@ -597,7 +600,7 @@ Status GroupToStruct(const GroupNode& node, LevelInfo current_levels, auto struct_type = ::arrow::struct_(arrow_fields); if (ctx->properties.get_arrow_extensions_enabled() && node.logical_type()->is_variant()) { - auto extension_type = ::arrow::GetExtensionType("parquet.variant"); + auto extension_type = ::arrow::GetExtensionType("arrow.parquet.variant"); if (extension_type) { ARROW_ASSIGN_OR_RAISE( struct_type, @@ -1147,10 +1150,10 @@ Result ApplyOriginalMetadata(const Field& origin_field, SchemaField* infer extension_supports_inferred_storage = arrow_extension_inferred || ::arrow::extension::UuidType::IsSupportedStorageType(inferred_type); - } else if (origin_extension_name == "parquet.variant") { + } else if (origin_extension_name == "arrow.parquet.variant") { extension_supports_inferred_storage = arrow_extension_inferred || - VariantExtensionType::IsSupportedStorageType(inferred_type); + ::arrow::extension::VariantExtensionType::IsSupportedStorageType(inferred_type); } else { extension_supports_inferred_storage = origin_extension_type.storage_type()->Equals(*inferred_type); diff --git a/cpp/src/parquet/arrow/variant_test.cc b/cpp/src/parquet/arrow/variant_test.cc index caf63d8e3d72..04f46d2e444d 100644 --- a/cpp/src/parquet/arrow/variant_test.cc +++ b/cpp/src/parquet/arrow/variant_test.cc @@ -15,9 +15,8 @@ // specific language governing permissions and limitations // under the License. -#include "parquet/arrow/variant_internal.h" - #include "arrow/array/validate.h" +#include "arrow/extension/parquet_variant.h" #include "arrow/ipc/test_common.h" #include "arrow/record_batch.h" #include "arrow/testing/gtest_util.h" @@ -29,16 +28,20 @@ using ::arrow::binary; using ::arrow::struct_; TEST(TestVariantExtensionType, StorageTypeValidation) { - auto variant1 = variant(struct_({field("metadata", binary(), /*nullable=*/false), - field("value", binary(), /*nullable=*/false)})); - auto variant2 = variant(struct_({field("metadata", binary(), /*nullable=*/false), - field("value", binary(), /*nullable=*/false)})); + auto variant1 = ::arrow::extension::variant( + struct_({field("metadata", binary(), /*nullable=*/false), + field("value", binary(), /*nullable=*/false)})); + auto variant2 = ::arrow::extension::variant( + struct_({field("metadata", binary(), /*nullable=*/false), + field("value", binary(), /*nullable=*/false)})); ASSERT_TRUE(variant1->Equals(variant2)); // Metadata and value fields can be provided in either order - auto variantFieldsFlipped = std::dynamic_pointer_cast( - variant(struct_({field("value", binary(), /*nullable=*/false), + auto variantFieldsFlipped = + std::dynamic_pointer_cast<::arrow::extension::VariantExtensionType>( + ::arrow::extension::variant( + struct_({field("value", binary(), /*nullable=*/false), field("metadata", binary(), /*nullable=*/false)}))); ASSERT_EQ("metadata", variantFieldsFlipped->metadata()->name()); @@ -62,7 +65,7 @@ TEST(TestVariantExtensionType, StorageTypeValidation) { Invalid, "Invalid: Invalid storage type for VariantExtensionType: " + storage_type->ToString(), - VariantExtensionType::Make(storage_type)); + ::arrow::extension::VariantExtensionType::Make(storage_type)); } }