This the multi-page printable view of this section. Click here to print.

Return to the regular view of this page.

Queries & Syntax

Learn about ClickHouse queries & syntax, including Joins & Window Functions.

1 - DateTime64

DateTime64 data type

Substract fractional seconds

WITH toDateTime64('2021-09-07 13:41:50.926', 3) AS time
SELECT
    time - 1,
    time - 0.1 AS no_affect,
    time - toDecimal64(0.1, 3) AS uncorrect_result,
    time - toIntervalMillisecond(100) AS correct_result -- from 22.4

Query id: 696722bd-3c22-4270-babe-c6b124fee97f

┌──────────minus(time, 1)─┬───────────────no_affect─┬────────uncorrect_result─┬──────────correct_result─┐
 2021-09-07 13:41:49.926  2021-09-07 13:41:50.926  1970-01-01 00:00:00.000  2021-09-07 13:41:50.826 
└─────────────────────────┴─────────────────────────┴─────────────────────────┴─────────────────────────┘


WITH
    toDateTime64('2021-03-03 09:30:00.100', 3) AS time,
    fromUnixTimestamp64Milli(toInt64(toUnixTimestamp64Milli(time) + (1.25 * 1000))) AS first,
    toDateTime64(toDecimal64(time, 3) + toDecimal64('1.25', 3), 3) AS second,
    reinterpret(reinterpret(time, 'Decimal64(3)') + toDecimal64('1.25', 3), 'DateTime64(3)') AS third,
    time + toIntervalMillisecond(1250) AS fourth, -- from 22.4
    addMilliseconds(time, 1250) AS fifth          -- from 22.4
SELECT
    first,
    second,
    third,
    fourth,
    fifth

Query id: 176cd2e7-68bf-4e26-a492-63e0b5a87cc5

┌───────────────────first─┬──────────────────second─┬───────────────────third─┬──────────────────fourth─┬───────────────────fifth─┐
 2021-03-03 09:30:01.350  2021-03-03 09:30:01.350  2021-03-03 09:30:01.350  2021-03-03 09:30:01.350  2021-03-03 09:30:01.350 
└─────────────────────────┴─────────────────────────┴─────────────────────────┴─────────────────────────┴─────────────────────────┘

SET max_threads=1;

Starting from 22.4

WITH
    materialize(toDateTime64('2021-03-03 09:30:00.100', 3)) AS time,
    time + toIntervalMillisecond(1250) AS fourth
SELECT count()
FROM numbers(100000000)
WHERE NOT ignore(fourth)

1 rows in set. Elapsed: 0.215 sec. Processed 100.03 million rows, 800.21 MB (464.27 million rows/s., 3.71 GB/s.)

WITH
    materialize(toDateTime64('2021-03-03 09:30:00.100', 3)) AS time,
    addMilliseconds(time, 1250) AS fifth
SELECT count()
FROM numbers(100000000)
WHERE NOT ignore(fifth)

1 rows in set. Elapsed: 0.208 sec. Processed 100.03 million rows, 800.21 MB (481.04 million rows/s., 3.85 GB/s.)

###########

WITH
    materialize(toDateTime64('2021-03-03 09:30:00.100', 3)) AS time,
    fromUnixTimestamp64Milli(reinterpretAsInt64(toUnixTimestamp64Milli(time) + (1.25 * 1000))) AS first
SELECT count()
FROM numbers(100000000)
WHERE NOT ignore(first)

1 rows in set. Elapsed: 0.370 sec. Processed 100.03 million rows, 800.21 MB (270.31 million rows/s., 2.16 GB/s.)

WITH
    materialize(toDateTime64('2021-03-03 09:30:00.100', 3)) AS time,
    fromUnixTimestamp64Milli(toUnixTimestamp64Milli(time) + toInt16(1.25 * 1000)) AS first
SELECT count()
FROM numbers(100000000)
WHERE NOT ignore(first)

1 rows in set. Elapsed: 0.256 sec. Processed 100.03 million rows, 800.21 MB (391.06 million rows/s., 3.13 GB/s.)


WITH
    materialize(toDateTime64('2021-03-03 09:30:00.100', 3)) AS time,
    toDateTime64(toDecimal64(time, 3) + toDecimal64('1.25', 3), 3) AS second
SELECT count()
FROM numbers(100000000)
WHERE NOT ignore(second)

1 rows in set. Elapsed: 2.240 sec. Processed 100.03 million rows, 800.21 MB (44.65 million rows/s., 357.17 MB/s.)

SET decimal_check_overflow=0;

WITH
    materialize(toDateTime64('2021-03-03 09:30:00.100', 3)) AS time,
    toDateTime64(toDecimal64(time, 3) + toDecimal64('1.25', 3), 3) AS second
SELECT count()
FROM numbers(100000000)
WHERE NOT ignore(second)

1 rows in set. Elapsed: 1.991 sec. Processed 100.03 million rows, 800.21 MB (50.23 million rows/s., 401.81 MB/s.)


WITH
    materialize(toDateTime64('2021-03-03 09:30:00.100', 3)) AS time,
    reinterpret(reinterpret(time, 'Decimal64(3)') + toDecimal64('1.25', 3), 'DateTime64(3)') AS third
SELECT count()
FROM numbers(100000000)
WHERE NOT ignore(third)

1 rows in set. Elapsed: 0.515 sec. Processed 100.03 million rows, 800.21 MB (194.39 million rows/s., 1.56 GB/s.)

SET decimal_check_overflow=0;

WITH
    materialize(toDateTime64('2021-03-03 09:30:00.100', 3)) AS time,
    reinterpret(reinterpret(time, 'Decimal64(3)') + toDecimal64('1.25', 3), 'DateTime64(3)') AS third
SELECT count()
FROM numbers(100000000)
WHERE NOT ignore(third)

1 rows in set. Elapsed: 0.281 sec. Processed 100.03 million rows, 800.21 MB (356.21 million rows/s., 2.85 GB/s.)

2 - Imprecise parsing of literal Decimal or Float64

Imprecise parsing of literal Decimal or Float64

Decimal

SELECT
    9.2::Decimal64(2) AS postgresql_cast,
    toDecimal64(9.2, 2) AS to_function,
    CAST(9.2, 'Decimal64(2)') AS cast_float_literal,
    CAST('9.2', 'Decimal64(2)') AS cast_string_literal

┌─postgresql_cast─┬─to_function─┬─cast_float_literal─┬─cast_string_literal─┐
             9.2         9.19                9.19                  9.2 
└─────────────────┴─────────────┴────────────────────┴─────────────────────┘

Float64

SELECT
    toFloat64(15008753.) AS to_func,
    toFloat64('1.5008753E7') AS to_func_scientific,
    CAST('1.5008753E7', 'Float64') AS cast_scientific

┌──to_func─┬─to_func_scientific─┬────cast_scientific─┐
 15008753  15008753.000000002  15008753.000000002 
└──────────┴────────────────────┴────────────────────┘

3 - Multiple aligned date columns in PARTITION BY expression

How to put multiple correlated date-like columns in partition key without generating a lot of partitions in case not exact match between them.

Alternative to doing that by minmax skip index.

CREATE TABLE part_key_multiple_dates
(
    `key` UInt32,
    `date` Date,
    `time` DateTime,
    `created_at` DateTime,
    `inserted_at` DateTime
)
ENGINE = MergeTree
PARTITION BY (toYYYYMM(date), ignore(created_at), ignore(inserted_at))
ORDER BY (key, time);


INSERT INTO part_key_multiple_dates SELECT
    number,
    toDate(x),
    now() + intDiv(number, 10) AS x,
    x - (rand() % 100),
    x + (rand() % 100)
FROM numbers(100000000);

SELECT count()
FROM part_key_multiple_dates
WHERE date > (now() + toIntervalDay(105));

┌─count()─┐
 8434210 
└─────────┘

1 rows in set. Elapsed: 0.022 sec. Processed 11.03 million rows, 22.05 MB (501.94 million rows/s., 1.00 GB/s.)

SELECT count()
FROM part_key_multiple_dates
WHERE inserted_at > (now() + toIntervalDay(105));

┌─count()─┐
 9279818 
└─────────┘

1 rows in set. Elapsed: 0.046 sec. Processed 11.03 million rows, 44.10 MB (237.64 million rows/s., 950.57 MB/s.)

SELECT count()
FROM part_key_multiple_dates
WHERE created_at > (now() + toIntervalDay(105));

┌─count()─┐
 9279139 
└─────────┘

1 rows in set. Elapsed: 0.043 sec. Processed 11.03 million rows, 44.10 MB (258.22 million rows/s., 1.03 GB/s.)

4 - Collecting query execution flamegraphs using system.trace_log

Collecting query execution flamegraph using trace_log

Collecting query execution flamegraph using system.trace_log

ClickHouse has embedded functionality to analyze the details of query performance.

It’s system.trace_log table.

By default it collects information only about queries when runs longer than 1 sec (and collects stacktraces every second).

You can adjust that per query using settings query_profiler_real_time_period_ns & query_profiler_cpu_time_period_ns.

Both works very similar (with desired interval dump the stacktraces of all the threads which execute the query). real timer - allows to ‘see’ the situtions when cpu was not working much, but time was spend for example on IO. cpu timer - allows to see the ‘hot’ points in calculations more accurately (skip the io time).

Trying to collect stacktraces with a frequency higher than few KHz is usually not possible.

To check where most of the RAM is used you can collect stacktraces during memory allocations / deallocation, by using the setting memory_profiler_sample_probability.

clickhouse-speedscope

# install 
wget https://github.com/laplab/clickhouse-speedscope/archive/refs/heads/master.tar.gz -O clickhouse-speedscope.tar.gz
tar -xvzf clickhouse-speedscope.tar.gz
cd clickhouse-speedscope-master/
pip3 install -r requirements.txt

For debugging particular query:

clickhouse-client 

SET query_profiler_cpu_time_period_ns=1000000; -- 1000 times per 'cpu' sec
-- or SET query_profiler_real_time_period_ns=2000000; -- 500 times per 'real' sec.
-- or SET memory_profiler_sample_probability=0.1; -- to debug the memory allocations

SELECT ... <your select>

SYSTEM FLUSH LOGS;

-- get the query_id from the clickhouse-client output or from system.query_log (also pay attention on query_id vs initial_query_id for distributed queries).

Now let’s process that:

python3 main.py &  # start the proxy in background
python3 main.py --query-id 908952ee-71a8-48a4-84d5-f4db92d45a5d # process the stacktraces
fg # get the proxy from background 
Ctrl + C  # stop it.

To access ClickHouse with other username / password etc. - see the sources of https://github.com/laplab/clickhouse-speedscope/blob/master/main.py

clickhouse-flamegraph

Installation & usage instructions: https://github.com/Slach/clickhouse-flamegraph

pure flamegraph.pl examples

git clone https://github.com/brendangregg/FlameGraph /opt/flamegraph

clickhouse-client -q "SELECT  arrayStringConcat(arrayReverse(arrayMap(x -> concat( addressToLine(x), '#', demangle(addressToSymbol(x)) ), trace)), ';') AS stack, count() AS samples FROM system.trace_log WHERE event_time >= subtractMinutes(now(),10) GROUP BY trace FORMAT TabSeparated" | /opt/flamegraph/flamegraph.pl > flamegraph.svg

clickhouse-client -q "SELECT  arrayStringConcat((arrayMap(x -> concat(splitByChar('/', addressToLine(x))[-1], '#', demangle(addressToSymbol(x)) ), trace)), ';') AS stack, sum(abs(size)) AS samples FROM system.trace_log where trace_type = 'Memory' and event_date = today() group by trace order by samples desc FORMAT TabSeparated" | /opt/flamegraph/flamegraph.pl > allocs.svg
clickhouse-client -q "SELECT  arrayStringConcat(arrayReverse(arrayMap(x -> concat(splitByChar('/', addressToLine(x))[-1], '#', demangle(addressToSymbol(x)) ), trace)), ';') AS stack, count() AS samples FROM system.trace_log where trace_type = 'Memory' group by trace FORMAT TabSeparated SETTINGS allow_introspection_functions=1" | /opt/flamegraph/flamegraph.pl > ~/mem1.svg

similar using perf

apt-get update -y 
apt-get install -y linux-tools-common linux-tools-generic linux-tools-`uname -r`git
apt-get install -y clickhouse-common-static-dbg clickhouse-common-dbg
mkdir -p /opt/flamegraph
git clone https://github.com/brendangregg/FlameGraph /opt/flamegraph

perf record -F 99 -p $(pidof clickhouse) -G
perf script > /tmp/out.perf
/opt/flamegraph/stackcollapse-perf.pl /tmp/out.perf | /opt/flamegraph/flamegraph.pl > /tmp/flamegraph.svg

also

https://kb.altinity.com/altinity-kb-queries-and-syntax/troubleshooting/#flamegraph

https://github.com/samber/grafana-flamegraph-panel/pull/2

5 - Using array functions to mimic window-functions alike behavior

Using array functions to mimic window-functions alike behavior.

Using array functions to mimic window functions alike behavior

There are some usecases when you may want to mimic window functions using Arrays - as an optimization step, or to contol the memory better / use on-disk spiling, or just if you have old ClickHouse version.

Running difference sample

DROP TABLE IS EXISTS test_running_difference

CREATE TABLE test_running_difference
ENGINE = Log AS
SELECT 
    number % 20 AS id, 
    toDateTime('2010-01-01 00:00:00') + (intDiv(number, 20) * 15) AS ts, 
    (number * round(xxHash32(number % 20) / 1000000)) - round(rand() / 1000000) AS val
FROM numbers(100)


SELECT * FROM test_running_difference
┌─id─┬──────────────────ts─┬────val─┐
│  0 │ 2010-01-01 00:00:00 │  -1209 │
│  1 │ 2010-01-01 00:00:00 │     43 │
│  2 │ 2010-01-01 00:00:00 │   4322 │
│  3 │ 2010-01-01 00:00:00 │    -25 │
│  4 │ 2010-01-01 00:00:00 │  13720 │
│  5 │ 2010-01-01 00:00:00 │    903 │
│  6 │ 2010-01-01 00:00:00 │  18062 │
│  7 │ 2010-01-01 00:00:00 │  -2873 │
│  8 │ 2010-01-01 00:00:00 │   6286 │
│  9 │ 2010-01-01 00:00:00 │  13399 │
│ 10 │ 2010-01-01 00:00:00 │  18320 │
│ 11 │ 2010-01-01 00:00:00 │  11731 │
│ 12 │ 2010-01-01 00:00:00 │    857 │
│ 13 │ 2010-01-01 00:00:00 │   8752 │
│ 14 │ 2010-01-01 00:00:00 │  23060 │
│ 15 │ 2010-01-01 00:00:00 │  41902 │
│ 16 │ 2010-01-01 00:00:00 │  39406 │
│ 17 │ 2010-01-01 00:00:00 │  50010 │
│ 18 │ 2010-01-01 00:00:00 │  57673 │
│ 19 │ 2010-01-01 00:00:00 │  51389 │
│  0 │ 2010-01-01 00:00:15 │  66839 │
│  1 │ 2010-01-01 00:00:15 │  19440 │
│  2 │ 2010-01-01 00:00:15 │  74513 │
│  3 │ 2010-01-01 00:00:15 │  10542 │
│  4 │ 2010-01-01 00:00:15 │  94245 │
│  5 │ 2010-01-01 00:00:15 │   8230 │
│  6 │ 2010-01-01 00:00:15 │  87823 │
│  7 │ 2010-01-01 00:00:15 │   -128 │
│  8 │ 2010-01-01 00:00:15 │  30101 │
│  9 │ 2010-01-01 00:00:15 │  54321 │
│ 10 │ 2010-01-01 00:00:15 │  64078 │
│ 11 │ 2010-01-01 00:00:15 │  31886 │
│ 12 │ 2010-01-01 00:00:15 │   8749 │
│ 13 │ 2010-01-01 00:00:15 │  28982 │
│ 14 │ 2010-01-01 00:00:15 │  61299 │
│ 15 │ 2010-01-01 00:00:15 │  95867 │
│ 16 │ 2010-01-01 00:00:15 │  93667 │
│ 17 │ 2010-01-01 00:00:15 │ 114072 │
│ 18 │ 2010-01-01 00:00:15 │ 124279 │
│ 19 │ 2010-01-01 00:00:15 │ 109605 │
│  0 │ 2010-01-01 00:00:30 │ 135082 │
│  1 │ 2010-01-01 00:00:30 │  37345 │
│  2 │ 2010-01-01 00:00:30 │ 148744 │
│  3 │ 2010-01-01 00:00:30 │  21607 │
│  4 │ 2010-01-01 00:00:30 │ 171744 │
│  5 │ 2010-01-01 00:00:30 │  14736 │
│  6 │ 2010-01-01 00:00:30 │ 155349 │
│  7 │ 2010-01-01 00:00:30 │  -3901 │
│  8 │ 2010-01-01 00:00:30 │  54303 │
│  9 │ 2010-01-01 00:00:30 │  89629 │
│ 10 │ 2010-01-01 00:00:30 │ 106595 │
│ 11 │ 2010-01-01 00:00:30 │  54545 │
│ 12 │ 2010-01-01 00:00:30 │  18903 │
│ 13 │ 2010-01-01 00:00:30 │  48023 │
│ 14 │ 2010-01-01 00:00:30 │  97930 │
│ 15 │ 2010-01-01 00:00:30 │ 152165 │
│ 16 │ 2010-01-01 00:00:30 │ 146130 │
│ 17 │ 2010-01-01 00:00:30 │ 174854 │
│ 18 │ 2010-01-01 00:00:30 │ 189194 │
│ 19 │ 2010-01-01 00:00:30 │ 170134 │
│  0 │ 2010-01-01 00:00:45 │ 207471 │
│  1 │ 2010-01-01 00:00:45 │  54323 │
│  2 │ 2010-01-01 00:00:45 │ 217984 │
│  3 │ 2010-01-01 00:00:45 │  31835 │
│  4 │ 2010-01-01 00:00:45 │ 252709 │
│  5 │ 2010-01-01 00:00:45 │  21493 │
│  6 │ 2010-01-01 00:00:45 │ 221271 │
│  7 │ 2010-01-01 00:00:45 │   -488 │
│  8 │ 2010-01-01 00:00:45 │  76827 │
│  9 │ 2010-01-01 00:00:45 │ 131066 │
│ 10 │ 2010-01-01 00:00:45 │ 149087 │
│ 11 │ 2010-01-01 00:00:45 │  71934 │
│ 12 │ 2010-01-01 00:00:45 │  25125 │
│ 13 │ 2010-01-01 00:00:45 │  65274 │
│ 14 │ 2010-01-01 00:00:45 │ 135980 │
│ 15 │ 2010-01-01 00:00:45 │ 210910 │
│ 16 │ 2010-01-01 00:00:45 │ 200007 │
│ 17 │ 2010-01-01 00:00:45 │ 235872 │
│ 18 │ 2010-01-01 00:00:45 │ 256112 │
│ 19 │ 2010-01-01 00:00:45 │ 229371 │
│  0 │ 2010-01-01 00:01:00 │ 275331 │
│  1 │ 2010-01-01 00:01:00 │  72668 │
│  2 │ 2010-01-01 00:01:00 │ 290366 │
│  3 │ 2010-01-01 00:01:00 │  46074 │
│  4 │ 2010-01-01 00:01:00 │ 329207 │
│  5 │ 2010-01-01 00:01:00 │  26770 │
│  6 │ 2010-01-01 00:01:00 │ 287619 │
│  7 │ 2010-01-01 00:01:00 │  -2207 │
│  8 │ 2010-01-01 00:01:00 │ 100456 │
│  9 │ 2010-01-01 00:01:00 │ 165688 │
│ 10 │ 2010-01-01 00:01:00 │ 194136 │
│ 11 │ 2010-01-01 00:01:00 │  94113 │
│ 12 │ 2010-01-01 00:01:00 │  35810 │
│ 13 │ 2010-01-01 00:01:00 │  85081 │
│ 14 │ 2010-01-01 00:01:00 │ 170256 │
│ 15 │ 2010-01-01 00:01:00 │ 265445 │
│ 16 │ 2010-01-01 00:01:00 │ 254828 │
│ 17 │ 2010-01-01 00:01:00 │ 297238 │
│ 18 │ 2010-01-01 00:01:00 │ 323494 │
│ 19 │ 2010-01-01 00:01:00 │ 286252 │
└────┴─────────────────────┴────────┘

100 rows in set. Elapsed: 0.003 sec. 

runningDifference works only in blocks & require ordered data & problematic when group changes

select id, val, runningDifference(val) from (select * from test_running_difference order by id, ts);
┌─id─┬────val─┬─runningDifference(val)─┐
│  0 │  -1209 │                      0 │
│  0 │  66839 │                  68048 │
│  0 │ 135082 │                  68243 │
│  0 │ 207471 │                  72389 │
│  0 │ 275331 │                  67860 │
│  1 │     43 │                -275288 │
│  1 │  19440 │                  19397 │
│  1 │  37345 │                  17905 │
│  1 │  54323 │                  16978 │
│  1 │  72668 │                  18345 │
│  2 │   4322 │                 -68346 │
│  2 │  74513 │                  70191 │
│  2 │ 148744 │                  74231 │
│  2 │ 217984 │                  69240 │
│  2 │ 290366 │                  72382 │
│  3 │    -25 │                -290391 │
│  3 │  10542 │                  10567 │
│  3 │  21607 │                  11065 │
│  3 │  31835 │                  10228 │
│  3 │  46074 │                  14239 │
│  4 │  13720 │                 -32354 │
│  4 │  94245 │                  80525 │
│  4 │ 171744 │                  77499 │
│  4 │ 252709 │                  80965 │
│  4 │ 329207 │                  76498 │
│  5 │    903 │                -328304 │
│  5 │   8230 │                   7327 │
│  5 │  14736 │                   6506 │
│  5 │  21493 │                   6757 │
│  5 │  26770 │                   5277 │
│  6 │  18062 │                  -8708 │
│  6 │  87823 │                  69761 │
│  6 │ 155349 │                  67526 │
│  6 │ 221271 │                  65922 │
│  6 │ 287619 │                  66348 │
│  7 │  -2873 │                -290492 │
│  7 │   -128 │                   2745 │
│  7 │  -3901 │                  -3773 │
│  7 │   -488 │                   3413 │
│  7 │  -2207 │                  -1719 │
│  8 │   6286 │                   8493 │
│  8 │  30101 │                  23815 │
│  8 │  54303 │                  24202 │
│  8 │  76827 │                  22524 │
│  8 │ 100456 │                  23629 │
│  9 │  13399 │                 -87057 │
│  9 │  54321 │                  40922 │
│  9 │  89629 │                  35308 │
│  9 │ 131066 │                  41437 │
│  9 │ 165688 │                  34622 │
│ 10 │  18320 │                -147368 │
│ 10 │  64078 │                  45758 │
│ 10 │ 106595 │                  42517 │
│ 10 │ 149087 │                  42492 │
│ 10 │ 194136 │                  45049 │
│ 11 │  11731 │                -182405 │
│ 11 │  31886 │                  20155 │
│ 11 │  54545 │                  22659 │
│ 11 │  71934 │                  17389 │
│ 11 │  94113 │                  22179 │
│ 12 │    857 │                 -93256 │
│ 12 │   8749 │                   7892 │
│ 12 │  18903 │                  10154 │
│ 12 │  25125 │                   6222 │
│ 12 │  35810 │                  10685 │
│ 13 │   8752 │                 -27058 │
│ 13 │  28982 │                  20230 │
│ 13 │  48023 │                  19041 │
│ 13 │  65274 │                  17251 │
│ 13 │  85081 │                  19807 │
│ 14 │  23060 │                 -62021 │
│ 14 │  61299 │                  38239 │
│ 14 │  97930 │                  36631 │
│ 14 │ 135980 │                  38050 │
│ 14 │ 170256 │                  34276 │
│ 15 │  41902 │                -128354 │
│ 15 │  95867 │                  53965 │
│ 15 │ 152165 │                  56298 │
│ 15 │ 210910 │                  58745 │
│ 15 │ 265445 │                  54535 │
│ 16 │  39406 │                -226039 │
│ 16 │  93667 │                  54261 │
│ 16 │ 146130 │                  52463 │
│ 16 │ 200007 │                  53877 │
│ 16 │ 254828 │                  54821 │
│ 17 │  50010 │                -204818 │
│ 17 │ 114072 │                  64062 │
│ 17 │ 174854 │                  60782 │
│ 17 │ 235872 │                  61018 │
│ 17 │ 297238 │                  61366 │
│ 18 │  57673 │                -239565 │
│ 18 │ 124279 │                  66606 │
│ 18 │ 189194 │                  64915 │
│ 18 │ 256112 │                  66918 │
│ 18 │ 323494 │                  67382 │
│ 19 │  51389 │                -272105 │
│ 19 │ 109605 │                  58216 │
│ 19 │ 170134 │                  60529 │
│ 19 │ 229371 │                  59237 │
│ 19 │ 286252 │                  56881 │
└────┴────────┴────────────────────────┘

100 rows in set. Elapsed: 0.005 sec. 

Arrays !

1. Group & Collect the data into array

you can collect several column by builing array of tuples:

SELECT 
    id, 
    groupArray(tuple(ts, val))
FROM test_running_difference
GROUP BY id

┌─id─┬─groupArray(tuple(ts, val))──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│  0 │ [('2010-01-01 00:00:00',-1209),('2010-01-01 00:00:15',66839),('2010-01-01 00:00:30',135082),('2010-01-01 00:00:45',207471),('2010-01-01 00:01:00',275331)]  │
│  1 │ [('2010-01-01 00:00:00',43),('2010-01-01 00:00:15',19440),('2010-01-01 00:00:30',37345),('2010-01-01 00:00:45',54323),('2010-01-01 00:01:00',72668)]        │
│  2 │ [('2010-01-01 00:00:00',4322),('2010-01-01 00:00:15',74513),('2010-01-01 00:00:30',148744),('2010-01-01 00:00:45',217984),('2010-01-01 00:01:00',290366)]   │
│  3 │ [('2010-01-01 00:00:00',-25),('2010-01-01 00:00:15',10542),('2010-01-01 00:00:30',21607),('2010-01-01 00:00:45',31835),('2010-01-01 00:01:00',46074)]       │
│  4 │ [('2010-01-01 00:00:00',13720),('2010-01-01 00:00:15',94245),('2010-01-01 00:00:30',171744),('2010-01-01 00:00:45',252709),('2010-01-01 00:01:00',329207)]  │
│  5 │ [('2010-01-01 00:00:00',903),('2010-01-01 00:00:15',8230),('2010-01-01 00:00:30',14736),('2010-01-01 00:00:45',21493),('2010-01-01 00:01:00',26770)]        │
│  6 │ [('2010-01-01 00:00:00',18062),('2010-01-01 00:00:15',87823),('2010-01-01 00:00:30',155349),('2010-01-01 00:00:45',221271),('2010-01-01 00:01:00',287619)]  │
│  7 │ [('2010-01-01 00:00:00',-2873),('2010-01-01 00:00:15',-128),('2010-01-01 00:00:30',-3901),('2010-01-01 00:00:45',-488),('2010-01-01 00:01:00',-2207)]       │
│  8 │ [('2010-01-01 00:00:00',6286),('2010-01-01 00:00:15',30101),('2010-01-01 00:00:30',54303),('2010-01-01 00:00:45',76827),('2010-01-01 00:01:00',100456)]     │
│  9 │ [('2010-01-01 00:00:00',13399),('2010-01-01 00:00:15',54321),('2010-01-01 00:00:30',89629),('2010-01-01 00:00:45',131066),('2010-01-01 00:01:00',165688)]   │
│ 10 │ [('2010-01-01 00:00:00',18320),('2010-01-01 00:00:15',64078),('2010-01-01 00:00:30',106595),('2010-01-01 00:00:45',149087),('2010-01-01 00:01:00',194136)]  │
│ 11 │ [('2010-01-01 00:00:00',11731),('2010-01-01 00:00:15',31886),('2010-01-01 00:00:30',54545),('2010-01-01 00:00:45',71934),('2010-01-01 00:01:00',94113)]     │
│ 12 │ [('2010-01-01 00:00:00',857),('2010-01-01 00:00:15',8749),('2010-01-01 00:00:30',18903),('2010-01-01 00:00:45',25125),('2010-01-01 00:01:00',35810)]        │
│ 13 │ [('2010-01-01 00:00:00',8752),('2010-01-01 00:00:15',28982),('2010-01-01 00:00:30',48023),('2010-01-01 00:00:45',65274),('2010-01-01 00:01:00',85081)]      │
│ 14 │ [('2010-01-01 00:00:00',23060),('2010-01-01 00:00:15',61299),('2010-01-01 00:00:30',97930),('2010-01-01 00:00:45',135980),('2010-01-01 00:01:00',170256)]   │
│ 15 │ [('2010-01-01 00:00:00',41902),('2010-01-01 00:00:15',95867),('2010-01-01 00:00:30',152165),('2010-01-01 00:00:45',210910),('2010-01-01 00:01:00',265445)]  │
│ 16 │ [('2010-01-01 00:00:00',39406),('2010-01-01 00:00:15',93667),('2010-01-01 00:00:30',146130),('2010-01-01 00:00:45',200007),('2010-01-01 00:01:00',254828)]  │
│ 17 │ [('2010-01-01 00:00:00',50010),('2010-01-01 00:00:15',114072),('2010-01-01 00:00:30',174854),('2010-01-01 00:00:45',235872),('2010-01-01 00:01:00',297238)] │
│ 18 │ [('2010-01-01 00:00:00',57673),('2010-01-01 00:00:15',124279),('2010-01-01 00:00:30',189194),('2010-01-01 00:00:45',256112),('2010-01-01 00:01:00',323494)] │
│ 19 │ [('2010-01-01 00:00:00',51389),('2010-01-01 00:00:15',109605),('2010-01-01 00:00:30',170134),('2010-01-01 00:00:45',229371),('2010-01-01 00:01:00',286252)] │
└────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘

Do needed ordering in each array

For example - by second element of tuple:

SELECT 
    id, 
    arraySort(x -> (x.2), groupArray((ts, val)))
FROM test_running_difference
GROUP BY id

┌─id─┬─arraySort(lambda(tuple(x), tupleElement(x, 2)), groupArray(tuple(ts, val)))─────────────────────────────────────────────────────────────────────────────────┐
│  0 │ [('2010-01-01 00:00:00',-1209),('2010-01-01 00:00:15',66839),('2010-01-01 00:00:30',135082),('2010-01-01 00:00:45',207471),('2010-01-01 00:01:00',275331)]  │
│  1 │ [('2010-01-01 00:00:00',43),('2010-01-01 00:00:15',19440),('2010-01-01 00:00:30',37345),('2010-01-01 00:00:45',54323),('2010-01-01 00:01:00',72668)]        │
│  2 │ [('2010-01-01 00:00:00',4322),('2010-01-01 00:00:15',74513),('2010-01-01 00:00:30',148744),('2010-01-01 00:00:45',217984),('2010-01-01 00:01:00',290366)]   │
│  3 │ [('2010-01-01 00:00:00',-25),('2010-01-01 00:00:15',10542),('2010-01-01 00:00:30',21607),('2010-01-01 00:00:45',31835),('2010-01-01 00:01:00',46074)]       │
│  4 │ [('2010-01-01 00:00:00',13720),('2010-01-01 00:00:15',94245),('2010-01-01 00:00:30',171744),('2010-01-01 00:00:45',252709),('2010-01-01 00:01:00',329207)]  │
│  5 │ [('2010-01-01 00:00:00',903),('2010-01-01 00:00:15',8230),('2010-01-01 00:00:30',14736),('2010-01-01 00:00:45',21493),('2010-01-01 00:01:00',26770)]        │
│  6 │ [('2010-01-01 00:00:00',18062),('2010-01-01 00:00:15',87823),('2010-01-01 00:00:30',155349),('2010-01-01 00:00:45',221271),('2010-01-01 00:01:00',287619)]  │
│  7 │ [('2010-01-01 00:00:30',-3901),('2010-01-01 00:00:00',-2873),('2010-01-01 00:01:00',-2207),('2010-01-01 00:00:45',-488),('2010-01-01 00:00:15',-128)]       │
│  8 │ [('2010-01-01 00:00:00',6286),('2010-01-01 00:00:15',30101),('2010-01-01 00:00:30',54303),('2010-01-01 00:00:45',76827),('2010-01-01 00:01:00',100456)]     │
│  9 │ [('2010-01-01 00:00:00',13399),('2010-01-01 00:00:15',54321),('2010-01-01 00:00:30',89629),('2010-01-01 00:00:45',131066),('2010-01-01 00:01:00',165688)]   │
│ 10 │ [('2010-01-01 00:00:00',18320),('2010-01-01 00:00:15',64078),('2010-01-01 00:00:30',106595),('2010-01-01 00:00:45',149087),('2010-01-01 00:01:00',194136)]  │
│ 11 │ [('2010-01-01 00:00:00',11731),('2010-01-01 00:00:15',31886),('2010-01-01 00:00:30',54545),('2010-01-01 00:00:45',71934),('2010-01-01 00:01:00',94113)]     │
│ 12 │ [('2010-01-01 00:00:00',857),('2010-01-01 00:00:15',8749),('2010-01-01 00:00:30',18903),('2010-01-01 00:00:45',25125),('2010-01-01 00:01:00',35810)]        │
│ 13 │ [('2010-01-01 00:00:00',8752),('2010-01-01 00:00:15',28982),('2010-01-01 00:00:30',48023),('2010-01-01 00:00:45',65274),('2010-01-01 00:01:00',85081)]      │
│ 14 │ [('2010-01-01 00:00:00',23060),('2010-01-01 00:00:15',61299),('2010-01-01 00:00:30',97930),('2010-01-01 00:00:45',135980),('2010-01-01 00:01:00',170256)]   │
│ 15 │ [('2010-01-01 00:00:00',41902),('2010-01-01 00:00:15',95867),('2010-01-01 00:00:30',152165),('2010-01-01 00:00:45',210910),('2010-01-01 00:01:00',265445)]  │
│ 16 │ [('2010-01-01 00:00:00',39406),('2010-01-01 00:00:15',93667),('2010-01-01 00:00:30',146130),('2010-01-01 00:00:45',200007),('2010-01-01 00:01:00',254828)]  │
│ 17 │ [('2010-01-01 00:00:00',50010),('2010-01-01 00:00:15',114072),('2010-01-01 00:00:30',174854),('2010-01-01 00:00:45',235872),('2010-01-01 00:01:00',297238)] │
│ 18 │ [('2010-01-01 00:00:00',57673),('2010-01-01 00:00:15',124279),('2010-01-01 00:00:30',189194),('2010-01-01 00:00:45',256112),('2010-01-01 00:01:00',323494)] │
│ 19 │ [('2010-01-01 00:00:00',51389),('2010-01-01 00:00:15',109605),('2010-01-01 00:00:30',170134),('2010-01-01 00:00:45',229371),('2010-01-01 00:01:00',286252)] │
└────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘

20 rows in set. Elapsed: 0.004 sec. 

That can be rewritten like this:

WITH 
    groupArray(tuple(ts, val)) as window_rows,
    arraySort(x -> x.1, window_rows) as sorted_window_rows
SELECT 
    id, 
    sorted_window_rows
FROM test_running_difference
GROUP BY id

Apply needed logic arrayMap / arrayDifference etc

WITH 
    groupArray(tuple(ts, val)) as window_rows,
    arraySort(x -> x.1, window_rows) as sorted_window_rows,
    arrayMap(x -> x.2, sorted_window_rows) as sorted_window_rows_val_column,
	arrayDifference(sorted_window_rows_val_column) as sorted_window_rows_val_column_diff
SELECT 
    id, 
    sorted_window_rows_val_column_diff
FROM test_running_difference
GROUP BY id
┌─id─┬─sorted_window_rows_val_column_diff─┐
│  0 │ [0,68048,68243,72389,67860]        │
│  1 │ [0,19397,17905,16978,18345]        │
│  2 │ [0,70191,74231,69240,72382]        │
│  3 │ [0,10567,11065,10228,14239]        │
│  4 │ [0,80525,77499,80965,76498]        │
│  5 │ [0,7327,6506,6757,5277]            │
│  6 │ [0,69761,67526,65922,66348]        │
│  7 │ [0,2745,-3773,3413,-1719]          │
│  8 │ [0,23815,24202,22524,23629]        │
│  9 │ [0,40922,35308,41437,34622]        │
│ 10 │ [0,45758,42517,42492,45049]        │
│ 11 │ [0,20155,22659,17389,22179]        │
│ 12 │ [0,7892,10154,6222,10685]          │
│ 13 │ [0,20230,19041,17251,19807]        │
│ 14 │ [0,38239,36631,38050,34276]        │
│ 15 │ [0,53965,56298,58745,54535]        │
│ 16 │ [0,54261,52463,53877,54821]        │
│ 17 │ [0,64062,60782,61018,61366]        │
│ 18 │ [0,66606,64915,66918,67382]        │
│ 19 │ [0,58216,60529,59237,56881]        │
└────┴────────────────────────────────────┘

20 rows in set. Elapsed: 0.005 sec. 

You can do also a lot of magic with arrayEnumerate and accessing different values by their ids.

Now you can return you arrays back to rows

use arrayJoin

WITH 
    groupArray(tuple(ts, val)) as window_rows,
    arraySort(x -> x.1, window_rows) as sorted_window_rows,
    arrayMap(x -> x.2, sorted_window_rows) as sorted_window_rows_val_column,
	arrayDifference(sorted_window_rows_val_column) as sorted_window_rows_val_column_diff,
	arrayJoin(sorted_window_rows_val_column_diff) as diff
SELECT 
    id, 
    diff
FROM test_running_difference
GROUP BY id

or ARRAY JOIN

SELECT 
  id,
  diff,
  ts
FROM 
(
WITH 
    groupArray(tuple(ts, val)) as window_rows,
    arraySort(x -> x.1, window_rows) as sorted_window_rows,
    arrayMap(x -> x.2, sorted_window_rows) as sorted_window_rows_val_column
SELECT 
    id, 
	arrayDifference(sorted_window_rows_val_column) as sorted_window_rows_val_column_diff,
    arrayMap(x -> x.1, sorted_window_rows) as sorted_window_rows_ts_column
FROM test_running_difference
GROUP BY id
) as t1
ARRAY JOIN sorted_window_rows_val_column_diff as diff, sorted_window_rows_ts_column as ts

etc.

6 - -State & -Merge combinators

-State & -Merge combinators

-State combinator doesn’t actually store information about -If combinator, so aggregate functions with -If and without have the same serialized data.

$ clickhouse-local --query "SELECT maxIfState(number,number % 2) as x, maxState(number) as y FROM numbers(10) FORMAT RowBinary" | clickhouse-local --input-format RowBinary --structure="x AggregateFunction(max,UInt64), y AggregateFunction(max,UInt64)" --query "SELECT maxMerge(x), maxMerge(y) FROM table"
9       9
$ clickhouse-local --query "SELECT maxIfState(number,number % 2) as x, maxState(number) as y FROM numbers(11) FORMAT RowBinary" | clickhouse-local --input-format RowBinary --structure="x AggregateFunction(max,UInt64), y AggregateFunction(max,UInt64)" --query "SELECT maxMerge(x), maxMerge(y) FROM table"
9       10

-State combinator have the same serialized data footprint regardless of parameters used in definition of aggregate function. That’s true for quantile* and sequenceMatch/sequenceCount functions.

$ clickhouse-local --query "SELECT quantilesTDigestIfState(0.1,0.9)(number,number % 2) FROM  numbers(1000000) FORMAT RowBinary" | clickhouse-local --input-format RowBinary --structure="x AggregateFunction(quantileTDigestWeighted(0.5),UInt64,UInt8)" --query "SELECT quantileTDigestWeightedMerge(0.4)(x) FROM table"
400000

$ clickhouse-local --query "SELECT quantilesTDigestIfState(0.1,0.9)(number,number % 2) FROM  numbers(1000000) FORMAT RowBinary" | clickhouse-local --input-format RowBinary --structure="x AggregateFunction(quantilesTDigestWeighted(0.5),UInt64,UInt8)" --query "SELECT quantilesTDigestWeightedMerge(0.4,0.8)(x) FROM table"
[400000,800000]

SELECT quantileMerge(0.9)(x)
FROM
(
    SELECT quantileState(0.1)(number) AS x
    FROM numbers(1000)
)

┌─quantileMerge(0.9)(x)─┐
                 899.1 
└───────────────────────┘
SELECT
    sequenceMatchMerge('(?2)(?3)')(x) AS `2_3`,
    sequenceMatchMerge('(?1)(?4)')(x) AS `1_4`,
    sequenceMatchMerge('(?1)(?2)(?3)')(x) AS `1_2_3`
FROM
(
    SELECT sequenceMatchState('(?1)(?2)(?3)')(number, number = 8, number = 5, number = 6, number = 9) AS x
    FROM numbers(10)
)

┌─2_3─┬─1_4─┬─1_2_3─┐
   1    1      0 
└─────┴─────┴───────┘

SELECT
    sequenceCountMerge('(?1)(?2)')(x) AS `2_3`,
    sequenceCountMerge('(?1)(?4)')(x) AS `1_4`,
    sequenceCountMerge('(?1)(?2)(?3)')(x) AS `1_2_3`
FROM
(
    WITH number % 4 AS cond
    SELECT sequenceCountState('(?1)(?2)(?3)')(number, cond = 1, cond = 2, cond = 3, cond = 5) AS x
    FROM numbers(11)
)

┌─2_3─┬─1_4─┬─1_2_3─┐
   3    0      2 
└─────┴─────┴───────┘

7 - ALTER MODIFY COLUMN is stuck, the column is inaccessible.

ALTER MODIFY COLUMN is stuck, the column is inaccessible.

Problem

You have table:

CREATE TABLE modify_column(column_n String) ENGINE=MergeTree() ORDER BY tuple();

Populate it with data:

INSERT INTO modify_column VALUES ('key_a');
INSERT INTO modify_column VALUES ('key_b');
INSERT INTO modify_column VALUES ('key_c');

Tried to apply alter table query with changing column type:

ALTER TABLE modify_column MODIFY COLUMN column_n Enum8('key_a'=1, 'key_b'=2);

But it didn’t succeed and you see an error in system.mutations table:

SELECT *
FROM system.mutations
WHERE (table = 'modify_column') AND (is_done = 0)
FORMAT Vertical

Row 1:
──────
database:                   default
table:                      modify_column
mutation_id:                mutation_4.txt
command:                    MODIFY COLUMN `column_n` Enum8('key_a' = 1, 'key_b' = 2)
create_time:                2021-03-03 18:38:09
block_numbers.partition_id: ['']
block_numbers.number:       [4]
parts_to_do_names:          ['all_3_3_0']
parts_to_do:                1
is_done:                    0
latest_failed_part:         all_3_3_0
latest_fail_time:           2021-03-03 18:38:59
latest_fail_reason:         Code: 36, e.displayText() = DB::Exception: Unknown element 'key_c' for type Enum8('key_a' = 1, 'key_b' = 2): while executing 'FUNCTION CAST(column_n :: 0, 'Enum8(\'key_a\' = 1, \'key_b\' = 2)' :: 1) -> cast(column_n, 'Enum8(\'key_a\' = 1, \'key_b\' = 2)') Enum8('key_a' = 1, 'key_b' = 2) : 2': (while reading from part /var/lib/clickhouse/data/default/modify_column/all_3_3_0/): While executing MergeTree (version 21.3.1.6041)

And you can’t query that column anymore:

SELECT column_n
FROM modify_column

┌─column_n─┐
 key_a    
└──────────┘
┌─column_n─┐
 key_b    
└──────────┘
 Progress: 2.00 rows, 2.00 B (19.48 rows/s., 19.48 B/s.)
2 rows in set. Elapsed: 0.104 sec.

Received exception from server (version 21.3.1):
Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Unknown element 'key_c' for type Enum8('key_a' = 1, 'key_b' = 2): while executing 'FUNCTION CAST(column_n :: 0, 'Enum8(\'key_a\' = 1, \'key_b\' = 2)' :: 1) -> cast(column_n, 'Enum8(\'key_a\' = 1, \'key_b\' = 2)') Enum8('key_a' = 1, 'key_b' = 2) : 2': (while reading from part /var/lib/clickhouse/data/default/modify_column/all_3_3_0/): While executing MergeTreeThread.

Solution

You should do the following:

Check which mutation is stuck and kill it:

SELECT * FROM system.mutations WHERE table = 'modify_column' AND is_done=0 FORMAT Vertical;
KILL MUTATION WHERE table = 'modify_column' AND mutation_id = 'id_of_stuck_mutation';

Apply reverting modify column query to convert table to previous column type:

ALTER TABLE modify_column MODIFY COLUMN column_n String;

Check if column is accessible now:

SELECT column_n, count() FROM modify_column GROUP BY column_n;

Run fixed ALTER MODIFY COLUMN query.

ALTER TABLE modify_column MODIFY COLUMN column_n Enum8('key_a'=1, 'key_b'=2, 'key_c'=3);

You can monitor progress of column type change with system.mutations or system.parts_columns tables:

SELECT
    command,
    parts_to_do,
    is_done
FROM system.mutations
WHERE table = 'modify_column'

SELECT
    column,
    type,
    count() AS parts,
    sum(rows) AS rows,
    sum(bytes_on_disk) AS bytes
FROM system.parts_columns
WHERE (table = 'modify_column') AND (column = 'column_n') AND active
GROUP BY
    column,
    type

8 - ANSI SQL mode

ANSI SQL mode

It’s possible to tune some settings which would make ClickHouse more ANSI SQL compatible(and slower):

SET join_use_nulls=1; -- introduced long ago
SET cast_keep_nullable=1; -- introduced in 20.5
SET union_default_mode='DISTINCT'; -- introduced in 21.1
SET allow_experimental_window_functions=1; -- introduced in 21.3
SET prefer_column_name_to_alias=1; -- introduced in 21.4;

9 - Atomic insert

Atomic insert

An insert is atomic if it creates only one part.

An insert will create one part if:

  • Data is inserted directly into a MergeTree table
  • Data is inserted into a single partition.
  • For INSERT FORMAT:
    • Number of rows is less than max_insert_block_size (default is 1048545)
    • Parallel formatting is disabled (For TSV, TKSV, CSV, and JSONEachRow formats setting input_format_parallel_parsing=0 is set).
  • For INSERT SELECT:
    • Number of rows is less than max_block_size
  • Smaller blocks are properly squashed up to the configured block size (min_insert_block_size_rows and min_insert_block_size_bytes)
  • The MergeTree table doesn’t have Materialized Views (there is no atomicity Table <> MV)

https://github.com/ClickHouse/ClickHouse/issues/9195#issuecomment-587500824 https://github.com/ClickHouse/ClickHouse/issues/5148#issuecomment-487757235

Example how to make a large insert atomically

Generate test data in Native and TSV format ( 100 millions rows )

Text formats and Native format require different set of settings, here I want to find / demonstrate mandatory minumum of settings for any case.

clickhouse-client -q \
     'select toInt64(number) A, toString(number) S from numbers(100000000) format Native' > t.native
clickhouse-client -q \
     'select toInt64(number) A, toString(number) S from numbers(100000000) format TSV' > t.tsv

Insert with default settings (not atomic)

drop table if exists trg;
create table trg(A Int64, S String) Engine=MergeTree order by A;

-- Load data in Native format
clickhouse-client  -q 'insert into trg format Native' <t.native

-- Check how many parts is created
SELECT 
    count(),
    min(rows),
    max(rows),
    sum(rows)
FROM system.parts
WHERE (level = 0) AND (table = 'trg');
┌─count()─┬─min(rows)─┬─max(rows)─┬─sum(rows)─┐
908909351113585100000000└─────────┴───────────┴───────────┴───────────┘

--- 90 parts! was created - not atomic



drop table if exists trg;
create table trg(A Int64, S String) Engine=MergeTree order by A;

-- Load data in TSV format
clickhouse-client  -q 'insert into trg format TSV' <t.tsv

-- Check how many parts is created
SELECT 
    count(),
    min(rows),
    max(rows),
    sum(rows)
FROM system.parts
WHERE (level = 0) AND (table = 'trg');
┌─count()─┬─min(rows)─┬─max(rows)─┬─sum(rows)─┐
858982071449610100000000└─────────┴───────────┴───────────┴───────────┘

--- 85 parts! was created - not atomic

Insert with adjusted settings (atomic)

Atomic insert use more memory because it needs 100 millions rows in memory.

drop table if exists trg;
create table trg(A Int64, S String) Engine=MergeTree order by A;

clickhouse-client --input_format_parallel_parsing=0 \
                  --min_insert_block_size_bytes=0 \
                  --min_insert_block_size_rows=1000000000 \
                  --max_insert_block_size=1000000000  \
                  -q 'insert into trg format Native' <t.native

-- Check that only one part is created
SELECT
    count(),
    min(rows),
    max(rows),
    sum(rows)
FROM system.parts
WHERE (level = 0) AND (table = 'trg');
┌─count()─┬─min(rows)─┬─max(rows)─┬─sum(rows)─┐
1100000000100000000100000000└─────────┴───────────┴───────────┴───────────┘

-- 1 part, success.



drop table if exists trg;
create table trg(A Int64, S String) Engine=MergeTree order by A;

-- Load data in TSV format
clickhouse-client --input_format_parallel_parsing=0 \
                  --min_insert_block_size_bytes=0 \
                  --min_insert_block_size_rows=1000000000 \
                  --max_insert_block_size=1000000000  \
                  -q 'insert into trg format TSV' <t.tsv

-- Check that only one part is created
SELECT 
    count(),
    min(rows),
    max(rows),
    sum(rows)
FROM system.parts
WHERE (level = 0) AND (table = 'trg');
┌─count()─┬─min(rows)─┬─max(rows)─┬─sum(rows)─┐
1100000000100000000100000000└─────────┴───────────┴───────────┴───────────┘

-- 1 part, success.

10 - Cumulative Anything

Cumulative Anything

Sample data

CREATE TABLE events
(
    `ts` DateTime,
    `user_id` UInt32
)
ENGINE = Memory;

INSERT INTO events SELECT
    toDateTime('2021-04-29 10:10:10') + toIntervalHour(7 * number) AS ts,
    toDayOfWeek(ts) + (number % 2) AS user_id
FROM numbers(15);

Using arrays

WITH
    groupArray(_ts) AS ts_arr,
    groupArray(state) AS state_arr
SELECT
    arrayJoin(ts_arr) AS ts,
    arrayReduce('uniqExactMerge', arrayFilter((x, y) -> (y <= ts), state_arr, ts_arr)) AS uniq
FROM
(
    SELECT
        toStartOfDay(ts) AS _ts,
        uniqExactState(user_id) AS state
    FROM events
    GROUP BY _ts
)
ORDER BY ts ASC

┌──────────────────ts─┬─uniq─┐
 2021-04-29 00:00:00     2 
 2021-04-30 00:00:00     3 
 2021-05-01 00:00:00     4 
 2021-05-02 00:00:00     5 
 2021-05-03 00:00:00     7 
└─────────────────────┴──────┘

WITH arrayJoin(range(toUInt32(_ts) AS int, least(int + toUInt32((3600 * 24) * 5), toUInt32(toDateTime('2021-05-04 00:00:00'))), 3600 * 24)) AS ts_expanded
SELECT
    toDateTime(ts_expanded) AS ts,
    uniqExactMerge(state) AS uniq
FROM
(
    SELECT
        toStartOfDay(ts) AS _ts,
        uniqExactState(user_id) AS state
    FROM events
    GROUP BY _ts
)
GROUP BY ts
ORDER BY ts ASC

┌──────────────────ts─┬─uniq─┐
 2021-04-29 00:00:00     2 
 2021-04-30 00:00:00     3 
 2021-05-01 00:00:00     4 
 2021-05-02 00:00:00     5 
 2021-05-03 00:00:00     7 
└─────────────────────┴──────┘

Using window functions (starting from Clickhouse 21.3)

SELECT
    ts,
    uniqExactMerge(state) OVER (ORDER BY ts ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS uniq
FROM
(
    SELECT
        toStartOfDay(ts) AS ts,
        uniqExactState(user_id) AS state
    FROM events
    GROUP BY ts
)
ORDER BY ts ASC

┌──────────────────ts─┬─uniq─┐
 2021-04-29 00:00:00     2 
 2021-04-30 00:00:00     3 
 2021-05-01 00:00:00     4 
 2021-05-02 00:00:00     5 
 2021-05-03 00:00:00     7 
└─────────────────────┴──────┘

Using runningAccumulate (incorrect result over blocks)

SELECT
    ts,
    runningAccumulate(state) AS uniq
FROM
(
    SELECT
        toStartOfDay(ts) AS ts,
        uniqExactState(user_id) AS state
    FROM events
    GROUP BY ts
    ORDER BY ts ASC
)
ORDER BY ts ASC

┌──────────────────ts─┬─uniq─┐
 2021-04-29 00:00:00     2 
 2021-04-30 00:00:00     3 
 2021-05-01 00:00:00     4 
 2021-05-02 00:00:00     5 
 2021-05-03 00:00:00     7 
└─────────────────────┴──────┘

11 - Data types on disk and in RAM

Data types on disk and in RAM
DataType RAM size (=byteSize) Disk Size
String

string byte length + 9

string length: 64 bit integer

zero-byte terminator: 1 byte.

string length prefix (varint) + string itself:


string shorter than 128 - string byte length + 1
string shorter than 16384 - string byte length + 2
string shorter than 2097152 - string byte length + 2
string shorter than 268435456 - string byte length + 4

AggregateFunction(count, ...) varint

See also https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup41/data_processing.pdf (slide 17-22)

12 - DELETE via tombstone column

DELETE via tombstone column
CREATE TABLE test_delete
(
    `key` UInt32,
    `ts` UInt32,
    `value_a` String,
    `value_b` String,
    `value_c` String,
    `is_active` UInt8 DEFAULT 1
)
ENGINE = MergeTree
ORDER BY key;

INSERT INTO test_delete (key, ts, value_a, value_b, value_c) SELECT
    number,
    1,
    concat('some_looong_string', toString(number)),
    concat('another_long_str', toString(number)),
    concat('string', toString(number))
FROM numbers(10000000);

INSERT INTO test_delete (key, ts, value_a, value_b, value_c) VALUES (400000, 2, 'totally different string', 'another totally different string', 'last string');

SELECT *
FROM test_delete
WHERE key = 400000;

┌────key─┬─ts─┬─value_a──────────────────┬─value_b──────────────────────────┬─value_c─────┬─is_active─┐
 400000   2  totally different string  another totally different string  last string          1 
└────────┴────┴──────────────────────────┴──────────────────────────────────┴─────────────┴───────────┘
┌────key─┬─ts─┬─value_a──────────────────┬─value_b────────────────┬─value_c──────┬─is_active─┐
 400000   1  some_looong_string400000  another_long_str400000  string400000          1 
└────────┴────┴──────────────────────────┴────────────────────────┴──────────────┴───────────┘

SET mutations_sync = 2;

ALTER TABLE test_delete
    UPDATE is_active = 0 WHERE (key = 400000) AND (ts = 1);

Ok.

0 rows in set. Elapsed: 0.058 sec.

SELECT *
FROM test_delete
WHERE (key = 400000) AND is_active;

┌────key─┬─ts─┬─value_a──────────────────┬─value_b──────────────────────────┬─value_c─────┬─is_active─┐
 400000   2  totally different string  another totally different string  last string          1 
└────────┴────┴──────────────────────────┴──────────────────────────────────┴─────────────┴───────────┘

ALTER TABLE test_delete
    DELETE WHERE (key = 400000) AND (ts = 1);

Ok.

0 rows in set. Elapsed: 1.101 sec. -- 20 times slower!!!

SELECT *
FROM test_delete
WHERE key = 400000;

┌────key─┬─ts─┬─value_a──────────────────┬─value_b──────────────────────────┬─value_c─────┬─is_active─┐
 400000   2  totally different string  another totally different string  last string          1 
└────────┴────┴──────────────────────────┴──────────────────────────────────┴─────────────┴───────────┘

-- For ReplacingMergeTree

OPTIMIZE TABLE test_delete FINAL;

Ok.

0 rows in set. Elapsed: 2.230 sec. -- 40 times slower!!!

SELECT *
FROM test_delete
WHERE key = 400000

┌────key─┬─ts─┬─value_a──────────────────┬─value_b──────────────────────────┬─value_c─────┬─is_active─┐
 400000   2  totally different string  another totally different string  last string          1 
└────────┴────┴──────────────────────────┴──────────────────────────────────┴─────────────┴───────────┘

13 - EXPLAIN query

EXPLAIN query

EXPLAIN types

EXPLAIN AST
        SYNTAX
        PLAN header = 0,
             description = 1,
             actions = 0,
             optimize = 1
        PIPELINE header = 0,
                 graph = 0,
                 compact = 1
        ESTIMATE
SELECT ...

References

14 - Fill missing values at query time

Fill missing values at query time
CREATE TABLE event_table
(
    `key` UInt32,
    `created_at` DateTime,
    `value_a` UInt32,
    `value_b` String
)
ENGINE = MergeTree
ORDER BY (key, created_at)

INSERT INTO event_table SELECT
    1 AS key,
    toDateTime('2020-10-11 10:10:10') + number AS created_at,
    if((number = 0) OR ((number % 5) = 1), number + 1, 0) AS value_a,
    if((number = 0) OR ((number % 3) = 1), toString(number), '') AS value_b
FROM numbers(10)

SELECT
    main.key,
    main.created_at,
    a.value_a,
    b.value_b
FROM event_table AS main
ASOF INNER JOIN
(
    SELECT
        key,
        created_at,
        value_a
    FROM event_table
    WHERE value_a != 0
) AS a ON (main.key = a.key) AND (main.created_at >= a.created_at)
ASOF INNER JOIN
(
    SELECT
        key,
        created_at,
        value_b
    FROM event_table
    WHERE value_b != ''
) AS b ON (main.key = b.key) AND (main.created_at >= b.created_at)

┌─main.key─┬─────main.created_at─┬─a.value_a─┬─b.value_b─┐
        1  2020-10-11 10:10:10          1  0         
        1  2020-10-11 10:10:11          2  1         
        1  2020-10-11 10:10:12          2  1         
        1  2020-10-11 10:10:13          2  1         
        1  2020-10-11 10:10:14          2  4         
        1  2020-10-11 10:10:15          2  4         
        1  2020-10-11 10:10:16          7  4         
        1  2020-10-11 10:10:17          7  7         
        1  2020-10-11 10:10:18          7  7         
        1  2020-10-11 10:10:19          7  7         
└──────────┴─────────────────────┴───────────┴───────────┘

SELECT
    key,
    created_at,
    value_a,
    value_b
FROM
(
    SELECT
        key,
        groupArray(created_at) AS created_arr,
        arrayFill(x -> (x != 0), groupArray(value_a)) AS a_arr,
        arrayFill(x -> (x != ''), groupArray(value_b)) AS b_arr
    FROM
    (
        SELECT *
        FROM event_table
        ORDER BY
            key ASC,
            created_at ASC
    )
    GROUP BY key
)
ARRAY JOIN
    created_arr AS created_at,
    a_arr AS value_a,
    b_arr AS value_b

┌─key─┬──────────created_at─┬─value_a─┬─value_b─┐
   1  2020-10-11 10:10:10        1  0       
   1  2020-10-11 10:10:11        2  1       
   1  2020-10-11 10:10:12        2  1       
   1  2020-10-11 10:10:13        2  1       
   1  2020-10-11 10:10:14        2  4       
   1  2020-10-11 10:10:15        2  4       
   1  2020-10-11 10:10:16        7  4       
   1  2020-10-11 10:10:17        7  7       
   1  2020-10-11 10:10:18        7  7       
   1  2020-10-11 10:10:19        7  7       
└─────┴─────────────────────┴─────────┴─────────┘

15 - FINAL clause speed

FINAL clause speed

SELECT * FROM table FINAL

See https://github.com/ClickHouse/ClickHouse/pull/15938 and https://github.com/ClickHouse/ClickHouse/issues/11722

So it can work in the following way:

  1. Daily partitioning
  2. After day end + some time interval during which you can get some updates - for example at 3am / 6am you do OPTIMIZE TABLE xxx PARTITION 'prev_day' FINAL
  3. In that case using that FINAL with do_not_merge_across_partitions_select_final will be cheap.
DROP TABLE IF EXISTS repl_tbl;

CREATE TABLE repl_tbl
(
    `key` UInt32,
    `val_1` UInt32,
    `val_2` String,
    `val_3` String,
    `val_4` String,
    `val_5` UUID,
    `ts` DateTime
)
ENGINE = ReplacingMergeTree(ts)
PARTITION BY toDate(ts)
ORDER BY key;


INSERT INTO repl_tbl SELECT number as key, rand() as val_1, randomStringUTF8(10) as val_2, randomStringUTF8(5) as val_3, randomStringUTF8(4) as val_4, generateUUIDv4() as val_5, '2020-01-01 00:00:00' as ts FROM numbers(10000000);
OPTIMIZE TABLE repl_tbl PARTITION ID '20200101' FINAL;
INSERT INTO repl_tbl SELECT number as key, rand() as val_1, randomStringUTF8(10) as val_2, randomStringUTF8(5) as val_3, randomStringUTF8(4) as val_4, generateUUIDv4() as val_5, '2020-01-02 00:00:00' as ts FROM numbers(10000000);
OPTIMIZE TABLE repl_tbl PARTITION ID '20200102' FINAL;
INSERT INTO repl_tbl SELECT number as key, rand() as val_1, randomStringUTF8(10) as val_2, randomStringUTF8(5) as val_3, randomStringUTF8(4) as val_4, generateUUIDv4() as val_5, '2020-01-03 00:00:00' as ts FROM numbers(10000000);
OPTIMIZE TABLE repl_tbl PARTITION ID '20200103' FINAL;
INSERT INTO repl_tbl SELECT number as key, rand() as val_1, randomStringUTF8(10) as val_2, randomStringUTF8(5) as val_3, randomStringUTF8(4) as val_4, generateUUIDv4() as val_5, '2020-01-04 00:00:00' as ts FROM numbers(10000000);
OPTIMIZE TABLE repl_tbl PARTITION ID '20200104' FINAL;

SYSTEM STOP MERGES repl_tbl;
INSERT INTO repl_tbl SELECT number as key, rand() as val_1, randomStringUTF8(10) as val_2, randomStringUTF8(5) as val_3, randomStringUTF8(4) as val_4, generateUUIDv4() as val_5, '2020-01-05 00:00:00' as ts FROM numbers(10000000);


SELECT count() FROM repl_tbl WHERE NOT ignore(*)

┌──count()─┐
 50000000 
└──────────┘

1 rows in set. Elapsed: 1.504 sec. Processed 50.00 million rows, 6.40 GB (33.24 million rows/s., 4.26 GB/s.)

SELECT count() FROM repl_tbl FINAL WHERE NOT ignore(*)

┌──count()─┐
 10000000 
└──────────┘

1 rows in set. Elapsed: 3.314 sec. Processed 50.00 million rows, 6.40 GB (15.09 million rows/s., 1.93 GB/s.)

/* more that 2 time slower, and will get worse once you will have more data */

set do_not_merge_across_partitions_select_final=1;

SELECT count() FROM repl_tbl FINAL WHERE NOT ignore(*)

┌──count()─┐
 50000000 
└──────────┘

1 rows in set. Elapsed: 1.850 sec. Processed 50.00 million rows, 6.40 GB (27.03 million rows/s., 3.46 GB/s.)

/* only 0.35 sec slower, and while partitions have about the same size that extra cost will be about constant */

16 - Join with Calendar using Arrays

Join with Calendar using Arrays

Sample data

CREATE TABLE test_metrics (counter_id Int64, timestamp DateTime, metric UInt64)
Engine=Log;

INSERT INTO test_metrics SELECT number % 3,
    toDateTime('2021-01-01 00:00:00'), 1
FROM numbers(20);

INSERT INTO test_metrics SELECT number % 3,
    toDateTime('2021-01-03 00:00:00'), 1
FROM numbers(20);

SELECT counter_id, toDate(timestamp) dt, sum(metric)
FROM test_metrics
GROUP BY counter_id, dt
ORDER BY counter_id, dt;

┌─counter_id─┬─────────dt─┬─sum(metric)─┐
          0  2021-01-01            7 
          0  2021-01-03            7 
          1  2021-01-01            7 
          1  2021-01-03            7 
          2  2021-01-01            6 
          2  2021-01-03            6 
└────────────┴────────────┴─────────────┘

Calendar

WITH arrayMap(i -> (toDate('2021-01-01') + i), range(4)) AS Calendar
SELECT arrayJoin(Calendar);

┌─arrayJoin(Calendar)─┐
          2021-01-01 
          2021-01-02 
          2021-01-03 
          2021-01-04 
└─────────────────────┘

Join with Calendar using arrayJoin

SELECT counter_id, tuple.2 dt, sum(tuple.1) sum FROM
  (
  WITH arrayMap(i -> (0, toDate('2021-01-01') + i), range(4)) AS Calendar
   SELECT counter_id, arrayJoin(arrayConcat(Calendar, [(sum, dt)])) tuple
   FROM
             (SELECT counter_id, toDate(timestamp) dt, sum(metric) sum
              FROM test_metrics
              GROUP BY counter_id, dt)
  ) GROUP BY counter_id, dt
    ORDER BY counter_id, dt;

┌─counter_id─┬─────────dt─┬─sum─┐
          0  2021-01-01    7 
          0  2021-01-02    0 
          0  2021-01-03    7 
          0  2021-01-04    0 
          1  2021-01-01    7 
          1  2021-01-02    0 
          1  2021-01-03    7 
          1  2021-01-04    0 
          2  2021-01-01    6 
          2  2021-01-02    0 
          2  2021-01-03    6 
          2  2021-01-04    0 
└────────────┴────────────┴─────┘

With fill

SELECT
    counter_id,
    toDate(timestamp) AS dt,
    sum(metric) AS sum
FROM test_metrics
GROUP BY
    counter_id,
    dt
ORDER BY
    counter_id ASC WITH FILL,
    dt ASC WITH FILL FROM toDate('2021-01-01') TO toDate('2021-01-05');

┌─counter_id─┬─────────dt─┬─sum─┐
          0  2021-01-01    7 
          0  2021-01-02    0 
          0  2021-01-03    7 
          0  2021-01-04    0 
          1  2021-01-01    7 
          1  2021-01-02    0 
          1  2021-01-03    7 
          1  2021-01-04    0 
          2  2021-01-01    6 
          2  2021-01-02    0 
          2  2021-01-03    6 
          2  2021-01-04    0 
└────────────┴────────────┴─────┘

17.1 - JOIN table engine

JOIN table engine

The main purpose of JOIN table engine is to avoid building the right table for joining on each query execution. So it’s usually used when you have a high amount of fast queries which share the same right table for joining.

Updates

It’s possible to update rows with setting join_any_take_last_row enabled.

CREATE TABLE id_val_join
(
    `id` UInt32,
    `val` UInt8
)
ENGINE = Join(ANY, LEFT, id)
SETTINGS join_any_take_last_row = 1

Ok.

INSERT INTO id_val_join VALUES (1,21)(1,22)(3,23);

Ok.

SELECT *
FROM
(
    SELECT toUInt32(number) AS id
    FROM numbers(4)
) AS n
ANY LEFT JOIN id_val_join USING (id)

┌─id─┬─val─┐
  0    0 
  1   22 
  2    0 
  3   23 
└────┴─────┘

INSERT INTO id_val_join VALUES (1,40)(2,24);

Ok.

SELECT *
FROM
(
    SELECT toUInt32(number) AS id
    FROM numbers(4)
) AS n
ANY LEFT JOIN id_val_join USING (id)

┌─id─┬─val─┐
  0    0 
  1   40 
  2   24 
  3   23 
└────┴─────┘

https://clickhouse.tech/docs/en/engines/table-engines/special/join/

18 - JSONExtract to parse many attributes at a time

JSONExtract to parse many attributes at a time
WITH JSONExtract(json, 'Tuple(name String, id String, resources Nested(description String, format String, tracking_summary Tuple(total UInt32, recent UInt32)), extras Nested(key String, value String))') AS parsed_json
SELECT
    tupleElement(parsed_json, 'name') AS name,
    tupleElement(parsed_json, 'id') AS id,
    tupleElement(tupleElement(parsed_json, 'resources'), 'description') AS `resources.description`,
    tupleElement(tupleElement(parsed_json, 'resources'), 'format') AS `resources.format`,
    tupleElement(tupleElement(tupleElement(parsed_json, 'resources'), 'tracking_summary'), 'total') AS `resources.tracking_summary.total`,
    tupleElement(tupleElement(tupleElement(parsed_json, 'resources'), 'tracking_summary'), 'recent') AS `resources.tracking_summary.recent`
FROM url('https://raw.githubusercontent.com/jsonlines/guide/master/datagov100.json', 'JSONAsString', 'json String')

19 - KILL QUERY

KILL QUERY

Unfortunately not all queries can be killed. KILL QUERY only sets a flag that must be checked by the query. A query pipeline is checking this flag before a switching to next block. If the pipeline has stuck somewhere in the middle it cannot be killed. If a query does not stop, the only way to get rid of it is to restart ClickHouse.

See also

https://github.com/ClickHouse/ClickHouse/issues/3964 https://github.com/ClickHouse/ClickHouse/issues/1576

How to replace a running query

Q. We are trying to abort running queries when they are being replaced with a new one. We are setting the same query id for this. In some cases this error happens:

Query with id = e213cc8c-3077-4a6c-bc78-e8463adad35d is already running and can’t be stopped

The query is still being killed but the new one is not being executed. Do you know anything about this and if there is a fix or workaround for it?

I guess you use replace_running_query + replace_running_query_max_wait_ms.

Unfortunately it’s not always possible to kill the query at random moment of time.

Kill don’t send any signals, it just set a flag. Which gets (synchronously) checked at certain moments of query execution, mostly after finishing processing one block and starting another.

On certain stages (executing scalar sub-query) the query can not be killed at all. This is a known issue and requires an architectural change to fix it.

I see. Is there a workaround?

This is our use case:

A user requests an analytics report which has a query that takes several settings, the user makes changes to the report (e.g. to filters, metrics, dimensions…). Since the user changed what he is looking for the query results from the initial query are never used and we would like to cancel it when starting the new query (edited)

You can just use 2 commands:

KILL QUERY WHERE query_id = ' ... ' ASYNC

SELECT ... new query ....

in that case you don’t need to care when the original query will be stopped.

20 - Lag / Lead

Lag / Lead

Sample data

CREATE TABLE llexample (
    g Int32,
    a Date )
ENGINE = Memory;

INSERT INTO llexample SELECT
    number % 3,
    toDate('2020-01-01') + number
FROM numbers(10);

SELECT * FROM llexample ORDER BY g,a;

┌─g─┬──────────a─┐
 0  2020-01-01 
 0  2020-01-04 
 0  2020-01-07 
 0  2020-01-10 
 1  2020-01-02 
 1  2020-01-05 
 1  2020-01-08 
 2  2020-01-03 
 2  2020-01-06 
 2  2020-01-09 
└───┴────────────┘

Using arrays

select g, (arrayJoin(tuple_ll) as ll).1 a, ll.2 prev, ll.3 next
from (
select g, arrayZip( arraySort(groupArray(a)) as aa,
                    arrayPopBack(arrayPushFront(aa, toDate(0))),
                    arrayPopFront(arrayPushBack(aa, toDate(0))) ) tuple_ll
from llexample
group by g)
order by g, a;

┌─g─┬──────────a─┬───────prev─┬───────next─┐
 0  2020-01-01  1970-01-01  2020-01-04 
 0  2020-01-04  2020-01-01  2020-01-07 
 0  2020-01-07  2020-01-04  2020-01-10 
 0  2020-01-10  2020-01-07  1970-01-01 
 1  2020-01-02  1970-01-01  2020-01-05 
 1  2020-01-05  2020-01-02  2020-01-08 
 1  2020-01-08  2020-01-05  1970-01-01 
 2  2020-01-03  1970-01-01  2020-01-06 
 2  2020-01-06  2020-01-03  2020-01-09 
 2  2020-01-09  2020-01-06  1970-01-01 
└───┴────────────┴────────────┴────────────┘

Using window functions (starting from Clickhouse 21.3)

SET allow_experimental_window_functions = 1;

SELECT
    g,
    a,
    any(a) OVER (PARTITION BY g ORDER BY a ASC ROWS
                 BETWEEN 1 PRECEDING AND 1 PRECEDING) AS prev,
    any(a) OVER (PARTITION BY g ORDER BY a ASC ROWS
                 BETWEEN 1 FOLLOWING AND 1 FOLLOWING) AS next
FROM llexample
ORDER BY
    g ASC,
    a ASC;

┌─g─┬──────────a─┬───────prev─┬───────next─┐
 0  2020-01-01  1970-01-01  2020-01-04 
 0  2020-01-04  2020-01-01  2020-01-07 
 0  2020-01-07  2020-01-04  2020-01-10 
 0  2020-01-10  2020-01-07  1970-01-01 
 1  2020-01-02  1970-01-01  2020-01-05 
 1  2020-01-05  2020-01-02  2020-01-08 
 1  2020-01-08  2020-01-05  1970-01-01 
 2  2020-01-03  1970-01-01  2020-01-06 
 2  2020-01-06  2020-01-03  2020-01-09 
 2  2020-01-09  2020-01-06  1970-01-01 
└───┴────────────┴────────────┴────────────┘

Using lagInFrame/leadInFrame (starting from ClickHouse 21.4)

SELECT
    g,
    a,
    lagInFrame(a) OVER (PARTITION BY g ORDER BY a ASC ROWS
                 BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS prev,
    leadInFrame(a) OVER (PARTITION BY g ORDER BY a ASC ROWS
                 BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS next
FROM llexample
ORDER BY
    g ASC,
    a ASC;

┌─g─┬──────────a─┬───────prev─┬───────next─┐
 0  2020-01-01  1970-01-01  2020-01-04 
 0  2020-01-04  2020-01-01  2020-01-07 
 0  2020-01-07  2020-01-04  2020-01-10 
 0  2020-01-10  2020-01-07  1970-01-01 
 1  2020-01-02  1970-01-01  2020-01-05 
 1  2020-01-05  2020-01-02  2020-01-08 
 1  2020-01-08  2020-01-05  1970-01-01 
 2  2020-01-03  1970-01-01  2020-01-06 
 2  2020-01-06  2020-01-03  2020-01-09 
 2  2020-01-09  2020-01-06  1970-01-01 
└───┴────────────┴────────────┴────────────┘

Using neighbor (no grouping, incorrect result over blocks)

SELECT
    g,
    a,
    neighbor(a, -1) AS prev,
    neighbor(a, 1) AS next
FROM
(
    SELECT *
    FROM llexample
    ORDER BY
        g ASC,
        a ASC
);

┌─g─┬──────────a─┬───────prev─┬───────next─┐
 0  2020-01-01  1970-01-01  2020-01-04 
 0  2020-01-04  2020-01-01  2020-01-07 
 0  2020-01-07  2020-01-04  2020-01-10 
 0  2020-01-10  2020-01-07  2020-01-02 
 1  2020-01-02  2020-01-10  2020-01-05 
 1  2020-01-05  2020-01-02  2020-01-08 
 1  2020-01-08  2020-01-05  2020-01-03 
 2  2020-01-03  2020-01-08  2020-01-06 
 2  2020-01-06  2020-01-03  2020-01-09 
 2  2020-01-09  2020-01-06  1970-01-01 
└───┴────────────┴────────────┴────────────┘

22 - Mutations

ALTER UPDATE / DELETE

Q. How to know if ALTER TABLE … DELETE/UPDATE mutation ON CLUSTER was finished successfully on all the nodes?

A. mutation status in system.mutations is local to each replica, so use

SELECT hostname(), * FROM clusterAllReplicas('your_cluster_name', system.mutations);
-- you can also add WHERE conditions to that query if needed.

Look on is_done and latest_fail_reason columns

23 - OPTIMIZE vs OPTIMIZE FINAL

OPTIMIZE vs OPTIMIZE FINAL

OPTIMIZE TABLE xyz – this initiates an unscheduled merge.

Example

You have 40 parts in 3 partitions. This unscheduled merge selects some partition (i.e. February) and selects 3 small parts to merge, then merge them into a single part. You get 38 parts in the result.

OPTIMIZE TABLE xyz FINAL – initiates a cycle of unscheduled merges.

ClickHouse merges parts in this table until will remains 1 part in each partition (if a system has enough free disk space). As a result, you get 3 parts, 1 part per partition. In this case, CH rewrites parts even if they are already merged into a single part. It creates a huge CPU / Disk load if the table ( XYZ) is huge. ClickHouse reads / uncompress / merge / compress / writes all data in the table.

If this table has size 1TB it could take around 3 hours to complete.

So we don’t recommend running OPTIMIZE TABLE xyz FINAL against tables with more than 10million rows.

24 - Parameterized views

Parameterized views

Custom settings allows to emulate parameterized views.

You need to enable custom settings and define any prefixes for settings.

$ cat /etc/clickhouse-server/config.d/custom_settings_prefixes.xml
<?xml version="1.0" ?>
<yandex>
    <custom_settings_prefixes>my,my2</custom_settings_prefixes>
</yandex>

You can also set the default value for user settings in the default section of the user configuration.

cat /etc/clickhouse-server/users.d/custom_settings_default.xml
<?xml version="1.0"?>
<yandex>
    <profiles>
        <default>
            <my2_category>'hot deals'</my2_category>
        </default>
    </profiles>
</yandex>

See also: https://kb.altinity.com/altinity-kb-setup-and-maintenance/custom_settings/

A server restart is required for the default value to be applied

$ systemctl restart clickhouse-server

Now you can set settings as any other settings, and query them using getSetting() function.

SET my2_category='hot deals';

SELECT getSetting('my2_category');
┌─getSetting('my2_category')─┐
 hot deals                  
└────────────────────────────┘

-- you can query ClickHouse settings as well
SELECT getSetting('max_threads')
┌─getSetting('max_threads')─┐
                         8 
└───────────────────────────┘

Now we can create a view

CREATE VIEW my_new_view AS
SELECT *
FROM deals
WHERE category_id IN
(
    SELECT category_id
    FROM deal_categories
    WHERE category = getSetting('my2_category')
);

And query it

SELECT *
FROM my_new_view
SETTINGS my2_category = 'hot deals';

If the custom setting is not set when the view is being created, you need to explicitly define the list of columns for the view:

CREATE VIEW my_new_view (c1 Int, c2 String, ...)
AS
SELECT *
FROM deals
WHERE category_id IN
(
    SELECT category_id
    FROM deal_categories
    WHERE category = getSetting('my2_category')
);

25 - Use both projection and raw data in single query

How to write queries, which will use both data from projection and raw table.
CREATE TABLE default.metric
(
    `key_a` UInt8,
    `key_b` UInt32,
    `date` Date,
    `value` UInt32,
    PROJECTION monthly
    (
        SELECT
            key_a,
            key_b,
            min(date),
            sum(value)
        GROUP BY
            key_a,
            key_b
    )
)
ENGINE = MergeTree
PARTITION BY toYYYYMM(date)
ORDER BY (key_a, key_b, date)
SETTINGS index_granularity = 8192;


INSERT INTO metric SELECT
    key_a,
    key_b,
    date,
    rand() % 100000 AS value
FROM
(
    SELECT
        arrayJoin(range(8)) AS key_a,
        number % 500000 AS key_b,
        today() - intDiv(number, 500000) AS date
    FROM numbers_mt(1080000000)
);

OPTIMIZE TABLE metric FINAL;

SET max_threads = 8;

WITH
    toDate('2015-02-27') AS start_date,
    toDate('2022-02-15') AS end_date,
    key_a IN (1, 3, 5, 7) AS key_a_cond
SELECT
    key_b,
    sum(value) AS sum
FROM metric
WHERE (date > start_date) AND (date < end_date) AND key_a_cond
GROUP BY key_b
ORDER BY sum DESC
LIMIT 25

25 rows in set. Elapsed: 6.561 sec. Processed 4.32 billion rows, 47.54 GB (658.70 million rows/s., 7.25 GB/s.)

WITH
    toDate('2015-02-27') AS start_date,
    toDate('2022-02-15') AS end_date,
    key_a IN (1, 3, 5, 7) AS key_a_cond
SELECT
    key_b,
    sum(value) AS sum
FROM
(
    SELECT
        key_b,
        value
    FROM metric
    WHERE indexHint(_partition_id IN CAST([toYYYYMM(start_date), toYYYYMM(end_date)], 'Array(String)')) AND (date > start_date) AND (date < end_date) AND key_a_cond
    UNION ALL
    SELECT
        key_b,
        sum(value) AS value
    FROM metric
    WHERE indexHint(_partition_id IN CAST(range(toYYYYMM(start_date) + 1, toYYYYMM(end_date)), 'Array(String)')) AND key_a_cond
    GROUP BY key_b
)
GROUP BY key_b
ORDER BY sum DESC
LIMIT 25

25 rows in set. Elapsed: 1.038 sec. Processed 181.86 million rows, 4.56 GB (175.18 million rows/s., 4.40 GB/s.)


WITH
    (toDate('2016-02-27'), toDate('2017-02-15')) AS period_1,
    (toDate('2018-05-27'), toDate('2022-08-15')) AS period_2,
    (date > (period_1.1)) AND (date < (period_1.2)) AS period_1_cond,
    (date > (period_2.1)) AND (date < (period_2.2)) AS period_2_cond,
    key_a IN (1, 3, 5, 7) AS key_a_cond
SELECT
    key_b,
    sumIf(value, period_1_cond) AS sum_per_1,
    sumIf(value, period_2_cond) AS sum_per_2
FROM metric
WHERE (period_1_cond OR period_2_cond) AND key_a_cond
GROUP BY key_b
ORDER BY sum_per_2 / sum_per_1 DESC
LIMIT 25

25 rows in set. Elapsed: 5.717 sec. Processed 3.47 billion rows, 38.17 GB (606.93 million rows/s., 6.68 GB/s.)

WITH
    (toDate('2016-02-27'), toDate('2017-02-15')) AS period_1,
    (toDate('2018-05-27'), toDate('2022-08-15')) AS period_2,
    (date > (period_1.1)) AND (date < (period_1.2)) AS period_1_cond,
    (date > (period_2.1)) AND (date < (period_2.2)) AS period_2_cond,
    CAST([toYYYYMM(period_1.1), toYYYYMM(period_1.2), toYYYYMM(period_2.1), toYYYYMM(period_2.2)], 'Array(String)') AS daily_parts,
    key_a IN (1, 3, 5, 7) AS key_a_cond
SELECT
    key_b,
    sumIf(value, period_1_cond) AS sum_per_1,
    sumIf(value, period_2_cond) AS sum_per_2
FROM
(
    SELECT
        key_b,
        date,
        value
    FROM metric
    WHERE indexHint(_partition_id IN (daily_parts)) AND (period_1_cond OR period_2_cond) AND key_a_cond
    UNION ALL
    SELECT
        key_b,
        min(date) AS date,
        sum(value) AS value
    FROM metric
    WHERE indexHint(_partition_id IN CAST(arrayConcat(range(toYYYYMM(period_1.1) + 1, toYYYYMM(period_1.2)), range(toYYYYMM(period_2.1) + 1, toYYYYMM(period_2.1))), 'Array(String)')) AND indexHint(_partition_id NOT IN (daily_parts)) AND key_a_cond
    GROUP BY
        key_b
)
GROUP BY key_b
ORDER BY sum_per_2 / sum_per_1 DESC
LIMIT 25


25 rows in set. Elapsed: 0.444 sec. Processed 140.34 million rows, 2.11 GB (316.23 million rows/s., 4.77 GB/s.)


WITH
    toDate('2022-01-03') AS start_date,
    toDate('2022-02-15') AS end_date,
    key_a IN (1, 3, 5, 7) AS key_a_cond
SELECT
    key_b,
    sum(value) AS sum
FROM metric
WHERE (date > start_date) AND (date < end_date) AND key_a_cond
GROUP BY key_b
ORDER BY sum DESC
LIMIT 25

25 rows in set. Elapsed: 0.208 sec. Processed 100.06 million rows, 1.10 GB (481.06 million rows/s., 5.29 GB/s.)


WITH
    toDate('2022-01-03') AS start_date,
    toDate('2022-02-15') AS end_date,
    key_a IN (1, 3, 5, 7) AS key_a_cond
SELECT
    key_b,
    sum(value) AS sum
FROM
(
    SELECT
        key_b,
        value
    FROM metric
    WHERE indexHint(_partition_id IN CAST([toYYYYMM(start_date), toYYYYMM(end_date)], 'Array(String)')) AND (date > start_date) AND (date < end_date) AND key_a_cond
    UNION ALL
    SELECT
        key_b,
        sum(value) AS value
    FROM metric
    WHERE indexHint(_partition_id IN CAST(range(toYYYYMM(start_date) + 1, toYYYYMM(end_date)), 'Array(String)')) AND key_a_cond
    GROUP BY key_b
)
GROUP BY key_b
ORDER BY sum DESC
LIMIT 25

25 rows in set. Elapsed: 0.216 sec. Processed 100.06 million rows, 1.10 GB (462.68 million rows/s., 5.09 GB/s.)


WITH
    toDate('2021-12-03') AS start_date,
    toDate('2022-02-15') AS end_date,
    key_a IN (1, 3, 5, 7) AS key_a_cond
SELECT
    key_b,
    sum(value) AS sum
FROM metric
WHERE (date > start_date) AND (date < end_date) AND key_a_cond
GROUP BY key_b
ORDER BY sum DESC
LIMIT 25

25 rows in set. Elapsed: 0.308 sec. Processed 162.09 million rows, 1.78 GB (526.89 million rows/s., 5.80 GB/s.)

WITH
    toDate('2021-12-03') AS start_date,
    toDate('2022-02-15') AS end_date,
    key_a IN (1, 3, 5, 7) AS key_a_cond
SELECT
    key_b,
    sum(value) AS sum
FROM
(
    SELECT
        key_b,
        value
    FROM metric
    WHERE indexHint(_partition_id IN CAST([toYYYYMM(start_date), toYYYYMM(end_date)], 'Array(String)')) AND (date > start_date) AND (date < end_date) AND key_a_cond
    UNION ALL
    SELECT
        key_b,
        sum(value) AS value
    FROM metric
    WHERE indexHint(_partition_id IN CAST(range(toYYYYMM(start_date) + 1, toYYYYMM(end_date)), 'Array(String)')) AND key_a_cond
    GROUP BY key_b
)
GROUP BY key_b
ORDER BY sum DESC
LIMIT 25

25 rows in set. Elapsed: 0.268 sec. Processed 102.08 million rows, 1.16 GB (381.46 million rows/s., 4.33 GB/s.)

26 - PIVOT / UNPIVOT

PIVOT / UNPIVOT

PIVOT

CREATE TABLE sales(suppkey UInt8, category String, quantity UInt32) ENGINE=Memory(); 

INSERT INTO sales VALUES (2, 'AA' ,7500),(1, 'AB' , 4000),(1, 'AA' , 6900),(1, 'AB', 8900), (1, 'AC', 8300), (1, 'AA', 7000), (1, 'AC', 9000), (2,'AA', 9800), (2,'AB', 9600), (1,'AC', 8900),(1, 'AD', 400), (2,'AD', 900), (2,'AD', 1200), (1,'AD', 2600), (2, 'AC', 9600),(1, 'AC', 6200);

Using Map data type (starting from Clickhouse 21.1)

WITH CAST(sumMap([category], [quantity]), 'Map(String, UInt32)') AS map
SELECT
    suppkey,
    map['AA'] AS AA,
    map['AB'] AS AB,
    map['AC'] AS AC,
    map['AD'] AS AD
FROM sales
GROUP BY suppkey
ORDER BY suppkey ASC

┌─suppkey─┬────AA─┬────AB─┬────AC─┬───AD─┐
       1  13900  12900  32400  3000 
       2  17300   9600   9600  2100 
└─────────┴───────┴───────┴───────┴──────┘

WITH CAST(sumMap(map(category, quantity)), 'Map(LowCardinality(String), UInt32)') AS map
SELECT
    suppkey,
    map['AA'] AS AA,
    map['AB'] AS AB,
    map['AC'] AS AC,
    map['AD'] AS AD
FROM sales
GROUP BY suppkey
ORDER BY suppkey ASC

┌─suppkey─┬────AA─┬────AB─┬────AC─┬───AD─┐
       1  13900  12900  32400  3000 
       2  17300   9600   9600  2100 
└─────────┴───────┴───────┴───────┴──────┘

Using -If combinator

SELECT
    suppkey,
    sumIf(quantity, category = 'AA') AS AA,
    sumIf(quantity, category = 'AB') AS AB,
    sumIf(quantity, category = 'AC') AS AC,
    sumIf(quantity, category = 'AD') AS AD
FROM sales
GROUP BY suppkey
ORDER BY suppkey ASC

┌─suppkey─┬────AA─┬────AB─┬────AC─┬───AD─┐
       1  13900  12900  32400  3000 
       2  17300   9600   9600  2100 
└─────────┴───────┴───────┴───────┴──────┘

Using -Resample combinator

WITH sumResample(0, 4, 1)(quantity, transform(category, ['AA', 'AB', 'AC', 'AD'], [0, 1, 2, 3], 4)) AS sum
SELECT
    suppkey,
    sum[1] AS AA,
    sum[2] AS AB,
    sum[3] AS AC,
    sum[4] AS AD
FROM sales
GROUP BY suppkey
ORDER BY suppkey ASC

┌─suppkey─┬────AA─┬────AB─┬────AC─┬───AD─┐
       1  13900  12900  32400  3000 
       2  17300   9600   9600  2100 
└─────────┴───────┴───────┴───────┴──────┘

UNPIVOT

CREATE TABLE sales_w(suppkey UInt8, brand String, AA UInt32, AB UInt32, AC UInt32,
AD UInt32) ENGINE=Memory();

 INSERT INTO sales_w VALUES (1, 'BRAND_A', 1500, 4200, 1600, 9800), (2, 'BRAND_B', 6200, 1300, 5800, 3100), (3, 'BRAND_C', 5000, 8900, 6900, 3400);
SELECT
    suppkey,
    brand,
    category,
    quantity
FROM sales_w
ARRAY JOIN
    [AA, AB, AC, AD] AS quantity,
    splitByString(', ', 'AA, AB, AC, AD') AS category
ORDER BY suppkey ASC

┌─suppkey─┬─brand───┬─category─┬─quantity─┐
       1  BRAND_A  AA            1500 
       1  BRAND_A  AB            4200 
       1  BRAND_A  AC            1600 
       1  BRAND_A  AD            9800 
       2  BRAND_B  AA            6200 
       2  BRAND_B  AB            1300 
       2  BRAND_B  AC            5800 
       2  BRAND_B  AD            3100 
       3  BRAND_C  AA            5000 
       3  BRAND_C  AB            8900 
       3  BRAND_C  AC            6900 
       3  BRAND_C  AD            3400 
└─────────┴─────────┴──────────┴──────────┘

Using tupleToNameValuePairs (starting from ClickHouse 21.9)

SELECT
    suppkey,
    brand,
    tpl.1 AS category,
    tpl.2 AS quantity
FROM sales_w
ARRAY JOIN tupleToNameValuePairs((AA, AB, AC, AD)) AS tpl
ORDER BY suppkey ASC

┌─suppkey─┬─brand───┬─category─┬─quantity─┐
       1  BRAND_A  AA            1500 
       1  BRAND_A  AB            4200 
       1  BRAND_A  AC            1600 
       1  BRAND_A  AD            9800 
       2  BRAND_B  AA            6200 
       2  BRAND_B  AB            1300 
       2  BRAND_B  AC            5800 
       2  BRAND_B  AD            3100 
       3  BRAND_C  AA            5000 
       3  BRAND_C  AB            8900 
       3  BRAND_C  AC            6900 
       3  BRAND_C  AD            3400 
└─────────┴─────────┴──────────┴──────────┘

27 - Possible deadlock avoided. Client should retry

Possible deadlock avoided. Client should retry

In version 19.14 a serious issue was found: a race condition that can lead to server deadlock. The reason for that was quite fundamental, and a temporary workaround for that was added (“possible deadlock avoided”).

Those locks are one of the fundamental things that the core team was actively working on in 2020.

In 20.3 some of the locks leading to that situation were removed as a part of huge refactoring.

In 20.4 more locks were removed, the check was made configurable (see lock_acquire_timeout ) so you can say how long to wait before returning that exception

In 20.5 heuristics of that check (“possible deadlock avoided”) was improved.

In 20.6 all table-level locks which were possible to remove were removed, so alters are totally lock-free.

20.10 enables database=Atomic by default which allows running even DROP commands without locks.

Typically issue was happening when doing some concurrent select on system.parts / system.columns / system.table with simultaneous table manipulations (doing some kind of ALTERS / TRUNCATES / DROP)I

If that exception happens often in your use-case: An update is recommended. In the meantime, check which queries are running (especially to system.tables / system.parts and other system tables) and check if killing them / avoiding them helps to solve the issue.

28 - Projections examples

Projections examples

Aggregating projections

create table z(Browser String, Country UInt8, F Float64)
Engine=MergeTree
order by Browser;

insert into z
     select toString(number%9999),
     number%33, 1
from numbers(100000000);

--Q1)
select sum(F), Browser
from z
group by Browser format Null;
Elapsed: 0.205 sec. Processed 100.00 million rows

--Q2)
select sum(F), Browser, Country
from z
group by Browser,Country format Null;
Elapsed: 0.381 sec. Processed 100.00 million rows

--Q3)
select sum(F),count(), Browser, Country
from z
group by Browser,Country format Null;
Elapsed: 0.398 sec. Processed 100.00 million rows

alter table z add projection pp
   (select Browser,Country, count(), sum(F)
    group by Browser,Country);
alter table z materialize projection pp;

---- 0 = don't use proj, 1 = use projection
set allow_experimental_projection_optimization=1;

--Q1)
select sum(F), Browser
from z
group by Browser format Null;
Elapsed: 0.003 sec. Processed 22.43 thousand rows

--Q2)
select sum(F), Browser, Country
from z
group by Browser,Country format Null;
Elapsed: 0.004 sec. Processed 22.43 thousand rows

--Q3)
select sum(F),count(), Browser, Country
from z
group by Browser,Country format Null;
Elapsed: 0.005 sec. Processed 22.43 thousand rows

See also

29 - Roaring bitmaps for calculating retention

CREATE TABLE test_roaring_bitmap
ENGINE = MergeTree
ORDER BY h AS
SELECT
    intDiv(number, 5) AS h,
    groupArray(toUInt16(number - (2 * intDiv(number, 5)))) AS vals,
    groupBitmapState(toUInt16(number - (2 * intDiv(number, 5)))) AS vals_bitmap
FROM numbers(40)
GROUP BY h

SELECT
    h,
    vals,
    hex(vals_bitmap)
FROM test_roaring_bitmap

┌─h─┬─vals─────────────┬─hex(vals_bitmap