diff --git a/third_party/overlays/clickhouse/.skip-tree b/third_party/overlays/clickhouse/.skip-tree
new file mode 100644
index 000000000..2b0975566
--- /dev/null
+++ b/third_party/overlays/clickhouse/.skip-tree
@@ -0,0 +1 @@
+this needs to be callPackage'd
diff --git a/third_party/overlays/clickhouse/default.nix b/third_party/overlays/clickhouse/default.nix
new file mode 100644
index 000000000..978a115f5
--- /dev/null
+++ b/third_party/overlays/clickhouse/default.nix
@@ -0,0 +1,203 @@
+{ lib
+, llvmPackages
+, fetchFromGitHub
+, fetchpatch
+, cmake
+, ninja
+, python3
+, perl
+, nasm
+, yasm
+, nixosTests
+, darwin
+, findutils
+
+ # currently for BLAKE3 hash function
+, rustSupport ? true
+
+, corrosion
+, rustc
+, cargo
+, rustPlatform
+}:
+
+let
+ inherit (llvmPackages) stdenv;
+ mkDerivation = (
+ if stdenv.isDarwin
+ then darwin.apple_sdk_11_0.llvmPackages_16.stdenv
+ else llvmPackages.stdenv
+ ).mkDerivation;
+in
+mkDerivation rec {
+ pname = "clickhouse";
+ version = "23.10.3.5";
+
+ src = fetchFromGitHub rec {
+ owner = "ClickHouse";
+ repo = "ClickHouse";
+ rev = "v${version}-stable";
+ fetchSubmodules = true;
+ name = "clickhouse-${rev}.tar.gz";
+ hash = "sha256-H3nIhBydLBxSesGrvqmwHmBoQGCGQlWgVVUudKLLkIY=";
+ postFetch = ''
+ # delete files that make the source too big
+ rm -rf $out/contrib/llvm-project/llvm/test
+ rm -rf $out/contrib/llvm-project/clang/test
+ rm -rf $out/contrib/croaring/benchmarks
+
+ # fix case insensitivity on macos https://github.com/NixOS/nixpkgs/issues/39308
+ rm -rf $out/contrib/sysroot/linux-*
+ rm -rf $out/contrib/liburing/man
+
+ # compress to not exceed the 2GB output limit
+ # try to make a deterministic tarball
+ tar -I 'gzip -n' \
+ --sort=name \
+ --mtime=1970-01-01 \
+ --owner=0 --group=0 \
+ --numeric-owner --mode=go=rX,u+rw,a-s \
+ --transform='s@^@source/@S' \
+ -cf temp -C "$out" .
+ rm -r "$out"
+ mv temp "$out"
+ '';
+ };
+
+ strictDeps = true;
+ nativeBuildInputs = [
+ cmake
+ ninja
+ python3
+ perl
+ llvmPackages.lld
+ ] ++ lib.optionals stdenv.isx86_64 [
+ nasm
+ yasm
+ ] ++ lib.optionals stdenv.isDarwin [
+ llvmPackages.bintools
+ findutils
+ darwin.bootstrap_cmds
+ ] ++ lib.optionals rustSupport [
+ rustc
+ cargo
+ rustPlatform.cargoSetupHook
+ ];
+
+ # their vendored version is too old and missing this patch: https://github.com/corrosion-rs/corrosion/pull/205
+ corrosionSrc =
+ if rustSupport then
+ fetchFromGitHub
+ {
+ owner = "corrosion-rs";
+ repo = "corrosion";
+ rev = "v0.3.5";
+ hash = "sha256-r/jrck4RiQynH1+Hx4GyIHpw/Kkr8dHe1+vTHg+fdRs=";
+ } else null;
+ corrosionDeps =
+ if rustSupport then
+ rustPlatform.fetchCargoTarball
+ {
+ src = corrosionSrc;
+ name = "corrosion-deps";
+ preBuild = "cd generator";
+ hash = "sha256-dhUgpwSjE9NZ2mCkhGiydI51LIOClA5wwk1O3mnnbM8=";
+ } else null;
+ rustDeps =
+ if rustSupport then
+ rustPlatform.fetchCargoTarball
+ {
+ inherit src;
+ name = "rust-deps";
+ preBuild = "cd rust";
+ hash = "sha256-fWDAGm19b7uZv8aBdBoieY5c6POd8IxFXbGdtONpZbw=";
+ } else null;
+
+ dontCargoSetupPostUnpack = true;
+ postUnpack = lib.optionalString rustSupport ''
+ pushd source
+
+ rm -rf contrib/corrosion
+ cp -r --no-preserve=mode $corrosionSrc contrib/corrosion
+
+ pushd contrib/corrosion/generator
+ cargoDeps="$corrosionDeps" cargoSetupPostUnpackHook
+ corrosionDepsCopy="$cargoDepsCopy"
+ popd
+
+ pushd rust
+ cargoDeps="$rustDeps" cargoSetupPostUnpackHook
+ rustDepsCopy="$cargoDepsCopy"
+ cat .cargo/config >> .cargo/config.toml.in
+ cat .cargo/config >> skim/.cargo/config.toml.in
+ rm .cargo/config
+ popd
+
+ popd
+ '';
+
+ postPatch = ''
+ patchShebangs src/
+
+ substituteInPlace src/Storages/System/StorageSystemLicenses.sh \
+ --replace 'git rev-parse --show-toplevel' '$src'
+ substituteInPlace utils/check-style/check-duplicate-includes.sh \
+ --replace 'git rev-parse --show-toplevel' '$src'
+ substituteInPlace utils/check-style/check-ungrouped-includes.sh \
+ --replace 'git rev-parse --show-toplevel' '$src'
+ substituteInPlace utils/list-licenses/list-licenses.sh \
+ --replace 'git rev-parse --show-toplevel' '$src'
+ substituteInPlace utils/check-style/check-style \
+ --replace 'git rev-parse --show-toplevel' '$src'
+ '' + lib.optionalString stdenv.isDarwin ''
+ sed -i 's|gfind|find|' cmake/tools.cmake
+ sed -i 's|ggrep|grep|' cmake/tools.cmake
+ '' + lib.optionalString rustSupport ''
+
+ pushd contrib/corrosion/generator
+ cargoDepsCopy="$corrosionDepsCopy" cargoSetupPostPatchHook
+ popd
+
+ pushd rust
+ cargoDepsCopy="$rustDepsCopy" cargoSetupPostPatchHook
+ popd
+
+ cargoSetupPostPatchHook() { true; }
+ '';
+
+ cmakeFlags = [
+ "-DENABLE_TESTS=OFF"
+ "-DCOMPILER_CACHE=disabled"
+ "-DENABLE_EMBEDDED_COMPILER=ON"
+ ];
+
+ # https://github.com/ClickHouse/ClickHouse/issues/49988
+ hardeningDisable = [ "fortify" ];
+
+ postInstall = ''
+ rm -rf $out/share/clickhouse-test
+
+ sed -i -e '\!/var/log/clickhouse-server/clickhouse-server\.log!d' \
+ $out/etc/clickhouse-server/config.xml
+ substituteInPlace $out/etc/clickhouse-server/config.xml \
+ --replace "/var/log/clickhouse-server/clickhouse-server.err.log" "1"
+ substituteInPlace $out/etc/clickhouse-server/config.xml \
+ --replace "trace" "warning"
+ '';
+
+ # Builds in 7+h with 2 cores, and ~20m with a big-parallel builder.
+ requiredSystemFeatures = [ "big-parallel" ];
+
+ passthru.tests.clickhouse = nixosTests.clickhouse;
+
+ meta = with lib; {
+ homepage = "https://clickhouse.com";
+ description = "Column-oriented database management system";
+ license = licenses.asl20;
+ maintainers = with maintainers; [ orivej ];
+
+ # not supposed to work on 32-bit https://github.com/ClickHouse/ClickHouse/pull/23959#issuecomment-835343685
+ platforms = lib.filter (x: (lib.systems.elaborate x).is64bit) (platforms.linux ++ platforms.darwin);
+ broken = stdenv.buildPlatform != stdenv.hostPlatform;
+ };
+}
diff --git a/third_party/overlays/patches/clickhouse-support-reading-arrow-LargeListArray.patch b/third_party/overlays/patches/clickhouse-support-reading-arrow-LargeListArray.patch
index 59231dbbc..9e79aa726 100644
--- a/third_party/overlays/patches/clickhouse-support-reading-arrow-LargeListArray.patch
+++ b/third_party/overlays/patches/clickhouse-support-reading-arrow-LargeListArray.patch
@@ -1,41 +1,40 @@
-From 26e65e4addc990cc09b59b587792ac4a454e5cdd Mon Sep 17 00:00:00 2001
+From cdea2e8ad98995202ce81c9c030f2ae64d73b05a Mon Sep 17 00:00:00 2001
From: edef
Date: Mon, 30 Oct 2023 08:08:10 +0000
-Subject: [PATCH] [backport] Support reading arrow::LargeListArray
+Subject: [PATCH] Support reading arrow::LargeListArray
---
- .../Formats/Impl/ArrowColumnToCHColumn.cpp | 35 ++++++++++++++-----
- 1 file changed, 26 insertions(+), 9 deletions(-)
+ .../Formats/Impl/ArrowColumnToCHColumn.cpp | 33 +++++++++++++++----
+ 1 file changed, 26 insertions(+), 7 deletions(-)
diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
-index 54a6c8493ea..94cf59fd357 100644
+index 6f9d49498f2..b93846cd4eb 100644
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
-@@ -336,7 +336,22 @@ static ColumnPtr readByteMapFromArrowColumn(std::shared_ptr
+@@ -436,6 +436,22 @@ static ColumnPtr readByteMapFromArrowColumn(std::shared_ptr
return nullmap_column;
}
--static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptr & arrow_column)
-+template
++template
+struct ArrowOffsetArray;
+
-+template<>
++template <>
+struct ArrowOffsetArray
+{
+ using type = arrow::Int32Array;
+};
+
-+template<>
++template <>
+struct ArrowOffsetArray
+{
+ using type = arrow::Int64Array;
+};
+
-+template static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptr & arrow_column)
++template
+ static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptr & arrow_column)
{
auto offsets_column = ColumnUInt64::create();
- ColumnArray::Offsets & offsets_data = assert_cast &>(*offsets_column).getData();
-@@ -346,9 +361,9 @@ static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptrnum_chunks(); chunk_i < num_chunks; ++chunk_i)
{
@@ -43,16 +42,16 @@ index 54a6c8493ea..94cf59fd357 100644
+ ArrowListArray & list_chunk = dynamic_cast(*(arrow_column->chunk(chunk_i)));
auto arrow_offsets_array = list_chunk.offsets();
- auto & arrow_offsets = dynamic_cast(*arrow_offsets_array);
-+ auto & arrow_offsets = dynamic_cast::type &>(*arrow_offsets_array);
++ auto & arrow_offsets = dynamic_cast::type &>(*arrow_offsets_array);
/*
- * It seems like arrow::ListArray::values() (nested column data) might or might not be shared across chunks.
-@@ -498,13 +513,13 @@ static ColumnPtr readColumnWithIndexesData(std::shared_ptr
+ * CH uses element size as "offsets", while arrow uses actual offsets as offsets.
+@@ -602,13 +618,14 @@ static ColumnPtr readColumnWithIndexesData(std::shared_ptr
}
}
--static std::shared_ptr getNestedArrowColumn(std::shared_ptr & arrow_column)
-+template static std::shared_ptr getNestedArrowColumn(std::shared_ptr & arrow_column)
++template
+ static std::shared_ptr getNestedArrowColumn(std::shared_ptr & arrow_column)
{
arrow::ArrayVector array_vector;
array_vector.reserve(arrow_column->num_chunks());
@@ -63,13 +62,13 @@ index 54a6c8493ea..94cf59fd357 100644
/*
* It seems like arrow::ListArray::values() (nested column data) might or might not be shared across chunks.
-@@ -636,12 +651,12 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
- if (map_type_hint)
- nested_type_hint = assert_cast(map_type_hint->getNestedType().get())->getNestedType();
+@@ -819,12 +836,12 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
+ key_type_hint = map_type_hint->getKeyType();
+ }
}
- auto arrow_nested_column = getNestedArrowColumn(arrow_column);
+ auto arrow_nested_column = getNestedArrowColumn(arrow_column);
- auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, nested_type_hint);
+ auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, date_time_overflow_behavior, nested_type_hint, true);
if (skipped)
return {};
@@ -78,7 +77,7 @@ index 54a6c8493ea..94cf59fd357 100644
const auto * tuple_column = assert_cast(nested_column.column.get());
const auto * tuple_type = assert_cast(nested_column.type.get());
-@@ -650,7 +665,9 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
+@@ -846,7 +863,9 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
return {std::move(map_column), std::move(map_type), column_name};
}
case arrow::Type::LIST:
@@ -88,13 +87,13 @@ index 54a6c8493ea..94cf59fd357 100644
DataTypePtr nested_type_hint;
if (type_hint)
{
-@@ -658,11 +675,11 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
+@@ -854,11 +873,11 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
if (array_type_hint)
nested_type_hint = array_type_hint->getNestedType();
}
- auto arrow_nested_column = getNestedArrowColumn(arrow_column);
+ auto arrow_nested_column = is_large ? getNestedArrowColumn(arrow_column) : getNestedArrowColumn(arrow_column);
- auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, nested_type_hint);
+ auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, date_time_overflow_behavior, nested_type_hint);
if (skipped)
return {};
- auto offsets_column = readOffsetsFromArrowListColumn(arrow_column);
diff --git a/third_party/overlays/tvl.nix b/third_party/overlays/tvl.nix
index 600161b66..fb2a89e7f 100644
--- a/third_party/overlays/tvl.nix
+++ b/third_party/overlays/tvl.nix
@@ -148,7 +148,12 @@ depot.nix.readTree.drvTargets {
};
};
- clickhouse = super.clickhouse.overrideAttrs (old: {
+ # we're vendoring this for now, since the version upgrade has a lot of changes
+ # we can't merge it upstream yet because the Darwin build is broken
+ # https://github.com/NixOS/nixpkgs/pull/267033
+ clickhouse = (super.callPackage ./clickhouse {
+ llvmPackages = super.llvmPackages_16;
+ }).overrideAttrs (old: {
patches = old.patches or [ ] ++ [
# https://github.com/ClickHouse/ClickHouse/pull/56118
./patches/clickhouse-support-reading-arrow-LargeListArray.patch