TTL GROUP BY Examples
Example with MergeTree table
CREATE TABLE test_ttl_group_by
(
`key` UInt32,
`ts` DateTime,
`value` UInt32,
`min_value` UInt32 DEFAULT value,
`max_value` UInt32 DEFAULT value
)
ENGINE = MergeTree
PARTITION BY toYYYYMM(ts)
ORDER BY (key, toStartOfDay(ts))
TTL ts + interval 30 day
GROUP BY key, toStartOfDay(ts)
SET value = sum(value),
min_value = min(min_value),
max_value = max(max_value),
ts = min(toStartOfDay(ts));
During TTL merges ClickHouse® re-calculates values of columns in the SET section.
GROUP BY section should be a prefix of a table’s PRIMARY KEY (the same as ORDER BY, if no separate PRIMARY KEY defined).
-- stop merges to demonstrate data before / after
-- a rolling up
SYSTEM STOP TTL MERGES test_ttl_group_by;
SYSTEM STOP MERGES test_ttl_group_by;
INSERT INTO test_ttl_group_by (key, ts, value)
SELECT
number % 5,
now() + number,
1
FROM numbers(100);
INSERT INTO test_ttl_group_by (key, ts, value)
SELECT
number % 5,
now() - interval 60 day + number,
2
FROM numbers(100);
SELECT
toYYYYMM(ts) AS m,
count(),
sum(value),
min(min_value),
max(max_value)
FROM test_ttl_group_by
GROUP BY m;
┌──────m─┬─count()─┬─sum(value)─┬─min(min_value)─┬─max(max_value)─┐
│ 202102 │ 100 │ 200 │ 2 │ 2 │
│ 202104 │ 100 │ 100 │ 1 │ 1 │
└────────┴─────────┴────────────┴────────────────┴────────────────┘
SYSTEM START TTL MERGES test_ttl_group_by;
SYSTEM START MERGES test_ttl_group_by;
OPTIMIZE TABLE test_ttl_group_by FINAL;
SELECT
toYYYYMM(ts) AS m,
count(),
sum(value),
min(min_value),
max(max_value)
FROM test_ttl_group_by
GROUP BY m;
┌──────m─┬─count()─┬─sum(value)─┬─min(min_value)─┬─max(max_value)─┐
│ 202102 │ 5 │ 200 │ 2 │ 2 │
│ 202104 │ 100 │ 100 │ 1 │ 1 │
└────────┴─────────┴────────────┴────────────────┴────────────────┘
As you can see 100 rows were rolled up into 5 rows (key has 5 values) for rows older than 30 days.
Example with SummingMergeTree table
CREATE TABLE test_ttl_group_by
(
`key1` UInt32,
`key2` UInt32,
`ts` DateTime,
`value` UInt32,
`min_value` SimpleAggregateFunction(min, UInt32)
DEFAULT value,
`max_value` SimpleAggregateFunction(max, UInt32)
DEFAULT value
)
ENGINE = SummingMergeTree
PARTITION BY toYYYYMM(ts)
PRIMARY KEY (key1, key2, toStartOfDay(ts))
ORDER BY (key1, key2, toStartOfDay(ts), ts)
TTL ts + interval 30 day
GROUP BY key1, key2, toStartOfDay(ts)
SET value = sum(value),
min_value = min(min_value),
max_value = max(max_value),
ts = min(toStartOfDay(ts));
-- stop merges to demonstrate data before / after
-- a rolling up
SYSTEM STOP TTL MERGES test_ttl_group_by;
SYSTEM STOP MERGES test_ttl_group_by;
INSERT INTO test_ttl_group_by (key1, key2, ts, value)
SELECT
1,
1,
toStartOfMinute(now() + number*60),
1
FROM numbers(100);
INSERT INTO test_ttl_group_by (key1, key2, ts, value)
SELECT
1,
1,
toStartOfMinute(now() + number*60),
1
FROM numbers(100);
INSERT INTO test_ttl_group_by (key1, key2, ts, value)
SELECT
1,
1,
toStartOfMinute(now() + number*60 - toIntervalDay(60)),
2
FROM numbers(100);
INSERT INTO test_ttl_group_by (key1, key2, ts, value)
SELECT
1,
1,
toStartOfMinute(now() + number*60 - toIntervalDay(60)),
2
FROM numbers(100);
SELECT
toYYYYMM(ts) AS m,
count(),
sum(value),
min(min_value),
max(max_value)
FROM test_ttl_group_by
GROUP BY m;
┌──────m─┬─count()─┬─sum(value)─┬─min(min_value)─┬─max(max_value)─┐
│ 202102 │ 200 │ 400 │ 2 │ 2 │
│ 202104 │ 200 │ 200 │ 1 │ 1 │
└────────┴─────────┴────────────┴────────────────┴────────────────┘
SYSTEM START TTL MERGES test_ttl_group_by;
SYSTEM START MERGES test_ttl_group_by;
OPTIMIZE TABLE test_ttl_group_by FINAL;
SELECT
toYYYYMM(ts) AS m,
count(),
sum(value),
min(min_value),
max(max_value)
FROM test_ttl_group_by
GROUP BY m;
┌──────m─┬─count()─┬─sum(value)─┬─min(min_value)─┬─max(max_value)─┐
│ 202102 │ 1 │ 400 │ 2 │ 2 │
│ 202104 │ 100 │ 200 │ 1 │ 1 │
└────────┴─────────┴────────────┴────────────────┴────────────────┘
During merges ClickHouse re-calculates ts columns as min(toStartOfDay(ts)). It’s possible only for the last column of SummingMergeTree
ORDER BY
section ORDER BY (key1, key2, toStartOfDay(ts), ts)
otherwise it will break the order of rows in the table.
Example with AggregatingMergeTree table
CREATE TABLE test_ttl_group_by_agg
(
`key1` UInt32,
`key2` UInt32,
`ts` DateTime,
`counter` AggregateFunction(count, UInt32)
)
ENGINE = AggregatingMergeTree
PARTITION BY toYYYYMM(ts)
PRIMARY KEY (key1, key2, toStartOfDay(ts))
ORDER BY (key1, key2, toStartOfDay(ts), ts)
TTL ts + interval 30 day
GROUP BY key1, key2, toStartOfDay(ts)
SET counter = countMergeState(counter),
ts = min(toStartOfDay(ts));
CREATE TABLE test_ttl_group_by_raw
(
`key1` UInt32,
`key2` UInt32,
`ts` DateTime
) ENGINE = Null;
CREATE MATERIALIZED VIEW test_ttl_group_by_mv
TO test_ttl_group_by_agg
AS
SELECT
`key1`,
`key2`,
`ts`,
countState() as counter
FROM test_ttl_group_by_raw
GROUP BY key1, key2, ts;
-- stop merges to demonstrate data before / after
-- a rolling up
SYSTEM STOP TTL MERGES test_ttl_group_by_agg;
SYSTEM STOP MERGES test_ttl_group_by_agg;
INSERT INTO test_ttl_group_by_raw (key1, key2, ts)
SELECT
1,
1,
toStartOfMinute(now() + number*60)
FROM numbers(100);
INSERT INTO test_ttl_group_by_raw (key1, key2, ts)
SELECT
1,
1,
toStartOfMinute(now() + number*60)
FROM numbers(100);
INSERT INTO test_ttl_group_by_raw (key1, key2, ts)
SELECT
1,
1,
toStartOfMinute(now() + number*60 - toIntervalDay(60))
FROM numbers(100);
INSERT INTO test_ttl_group_by_raw (key1, key2, ts)
SELECT
1,
1,
toStartOfMinute(now() + number*60 - toIntervalDay(60))
FROM numbers(100);
SELECT
toYYYYMM(ts) AS m,
count(),
countMerge(counter)
FROM test_ttl_group_by_agg
GROUP BY m;
┌──────m─┬─count()─┬─countMerge(counter)─┐
│ 202307 │ 200 │ 200 │
│ 202309 │ 200 │ 200 │
└────────┴─────────┴─────────────────────┘
SYSTEM START TTL MERGES test_ttl_group_by_agg;
SYSTEM START MERGES test_ttl_group_by_agg;
OPTIMIZE TABLE test_ttl_group_by_agg FINAL;
SELECT
toYYYYMM(ts) AS m,
count(),
countMerge(counter)
FROM test_ttl_group_by_agg
GROUP BY m;
┌──────m─┬─count()─┬─countMerge(counter)─┐
│ 202307 │ 1 │ 200 │
│ 202309 │ 100 │ 200 │
└────────┴─────────┴─────────────────────┘
Multilevel TTL Group by
CREATE TABLE test_ttl_group_by
(
`key` UInt32,
`ts` DateTime,
`value` UInt32,
`min_value` UInt32 DEFAULT value,
`max_value` UInt32 DEFAULT value
)
ENGINE = MergeTree
PARTITION BY toYYYYMM(ts)
ORDER BY (key, toStartOfWeek(ts), toStartOfDay(ts), toStartOfHour(ts))
TTL
ts + interval 1 hour
GROUP BY key, toStartOfWeek(ts), toStartOfDay(ts), toStartOfHour(ts)
SET value = sum(value),
min_value = min(min_value),
max_value = max(max_value),
ts = min(toStartOfHour(ts)),
ts + interval 1 day
GROUP BY key, toStartOfWeek(ts), toStartOfDay(ts)
SET value = sum(value),
min_value = min(min_value),
max_value = max(max_value),
ts = min(toStartOfDay(ts)),
ts + interval 30 day
GROUP BY key, toStartOfWeek(ts)
SET value = sum(value),
min_value = min(min_value),
max_value = max(max_value),
ts = min(toStartOfWeek(ts));
SYSTEM STOP TTL MERGES test_ttl_group_by;
SYSTEM STOP MERGES test_ttl_group_by;
INSERT INTO test_ttl_group_by (key, ts, value)
SELECT
number % 5,
now() + number,
1
FROM numbers(100);
INSERT INTO test_ttl_group_by (key, ts, value)
SELECT
number % 5,
now() - interval 2 hour + number,
2
FROM numbers(100);
INSERT INTO test_ttl_group_by (key, ts, value)
SELECT
number % 5,
now() - interval 2 day + number,
3
FROM numbers(100);
INSERT INTO test_ttl_group_by (key, ts, value)
SELECT
number % 5,
now() - interval 2 month + number,
4
FROM numbers(100);
SELECT
toYYYYMMDD(ts) AS d,
count(),
sum(value),
min(min_value),
max(max_value)
FROM test_ttl_group_by
GROUP BY d
ORDER BY d;
┌────────d─┬─count()─┬─sum(value)─┬─min(min_value)─┬─max(max_value)─┐
│ 20210616 │ 100 │ 400 │ 4 │ 4 │
│ 20210814 │ 100 │ 300 │ 3 │ 3 │
│ 20210816 │ 200 │ 300 │ 1 │ 2 │
└──────────┴─────────┴────────────┴────────────────┴────────────────┘
SYSTEM START TTL MERGES test_ttl_group_by;
SYSTEM START MERGES test_ttl_group_by;
OPTIMIZE TABLE test_ttl_group_by FINAL;
SELECT
toYYYYMMDD(ts) AS d,
count(),
sum(value),
min(min_value),
max(max_value)
FROM test_ttl_group_by
GROUP BY d
ORDER BY d;
┌────────d─┬─count()─┬─sum(value)─┬─min(min_value)─┬─max(max_value)─┐
│ 20210613 │ 5 │ 400 │ 4 │ 4 │
│ 20210814 │ 5 │ 300 │ 3 │ 3 │
│ 20210816 │ 105 │ 300 │ 1 │ 2 │
└──────────┴─────────┴────────────┴────────────────┴────────────────┘
TTL GROUP BY + DELETE
CREATE TABLE test_ttl_group_by
(
`key` UInt32,
`ts` DateTime,
`value` UInt32,
`min_value` UInt32 DEFAULT value,
`max_value` UInt32 DEFAULT value
)
ENGINE = MergeTree
PARTITION BY toYYYYMM(ts)
ORDER BY (key, toStartOfDay(ts))
TTL
ts + interval 180 day,
ts + interval 30 day
GROUP BY key, toStartOfDay(ts)
SET value = sum(value),
min_value = min(min_value),
max_value = max(max_value),
ts = min(toStartOfDay(ts));
-- stop merges to demonstrate data before / after
-- a rolling up
SYSTEM STOP TTL MERGES test_ttl_group_by;
SYSTEM STOP MERGES test_ttl_group_by;
INSERT INTO test_ttl_group_by (key, ts, value)
SELECT
number % 5,
now() + number,
1
FROM numbers(100);
INSERT INTO test_ttl_group_by (key, ts, value)
SELECT
number % 5,
now() - interval 60 day + number,
2
FROM numbers(100);
INSERT INTO test_ttl_group_by (key, ts, value)
SELECT
number % 5,
now() - interval 200 day + number,
3
FROM numbers(100);
SELECT
toYYYYMM(ts) AS m,
count(),
sum(value),
min(min_value),
max(max_value)
FROM test_ttl_group_by
GROUP BY m;
┌──────m─┬─count()─┬─sum(value)─┬─min(min_value)─┬─max(max_value)─┐
│ 202101 │ 100 │ 300 │ 3 │ 3 │
│ 202106 │ 100 │ 200 │ 2 │ 2 │
│ 202108 │ 100 │ 100 │ 1 │ 1 │
└────────┴─────────┴────────────┴────────────────┴────────────────┘
SYSTEM START TTL MERGES test_ttl_group_by;
SYSTEM START MERGES test_ttl_group_by;
OPTIMIZE TABLE test_ttl_group_by FINAL;
┌──────m─┬─count()─┬─sum(value)─┬─min(min_value)─┬─max(max_value)─┐
│ 202106 │ 5 │ 200 │ 2 │ 2 │
│ 202108 │ 100 │ 100 │ 1 │ 1 │
└────────┴─────────┴────────────┴────────────────┴────────────────┘
Also see the Altinity Knowledge Base pages on the MergeTree table engine family.