创建/配置
yaml
version: '3.8'
services:
clickhouse:
image: docker.io/clickhouse/clickhouse-server
container_name: clickhouse
restart: always
user: "101:101"
environment:
CLICKHOUSE_DB: default
CLICKHOUSE_USER: ${CLICKHOUSE_USER:-clickhouse}
CLICKHOUSE_PASSWORD: ${CLICKHOUSE_PASSWORD:-clickhouse} # CHANGEME
volumes:
- clickhouse_data:/var/lib/clickhouse
- clickhouse_logs:/var/log/clickhouse-server
ports:
- 127.0.0.1:8123:8123
- 127.0.0.1:9000:9000
healthcheck:
test: wget --no-verbose --tries=1 --spider http://localhost:8123/ping || exit 1
interval: 5s
timeout: 5s
retries: 10
start_period: 1s
networks:
- langfuse-network
volumes:
clickhouse_data:
driver: local
clickhouse_logs:
driver: local
networks:
langfuse-network:
name: langfuse-network
driver: bridge系统日志配置
在上面的配置中的volumns 中增加配置项目:
./config/disable_system_logs.xml:/etc/clickhouse-server/config.d/disable_system_logs.xml- 前面是本地文件路径,后面是映射的容器内部配置文件路径
完全禁用
xml
<clickhouse>
<!-- 完全禁用所有系统日志表 -->
<asynchronous_metric_log remove="1"/>
<backup_log remove="1"/>
<error_log remove="1"/>
<metric_log remove="1"/>
<query_metric_log remove="1"/>
<query_thread_log remove="1" />
<query_log remove="1" />
<query_views_log remove="1" />
<part_log remove="1"/>
<session_log remove="1"/>
<text_log remove="1" />
<trace_log remove="1"/>
<crash_log remove="1"/>
<opentelemetry_span_log remove="1"/>
<zookeeper_log remove="1"/>
<processors_profile_log remove="1"/>
<latency_log remove="1"/>
<background_schedule_pool_log remove="1"/>
<aggregated_zookeeper_log remove="1"/>
<zookeeper_connection_log remove="1"/>
</clickhouse>- 部分禁用
xml
<clickhouse>
<!-- 只禁用最占空间的几个 -->
<trace_log remove="1"/> <!-- 这个最大,禁用 -->
<text_log remove="1"/> <!-- 这个也大,禁用 -->
<metric_log remove="1"/> <!-- 指标日志,禁用 -->
<!-- 保留 query_log 和 part_log(但建议设置 TTL) -->
<query_log>
<ttl>event_date + INTERVAL 7 DAY DELETE</ttl>
<partition_by>toYYYYMM(event_date)</partition_by>
</query_log>
<part_log>
<ttl>event_date + INTERVAL 7 DAY DELETE</ttl>
<partition_by>toYYYYMM(event_date)</partition_by>
</part_log>
</clickhouse>数据查询
磁盘占用
SELECT
database,
table,
formatReadableSize(sum(bytes_on_disk)) AS total_size,
sum(rows) AS total_rows,
count() AS part_count
FROM system.parts
WHERE active = 1
GROUP BY database, table
ORDER BY sum(bytes_on_disk) DESC;