duckdb install
windows 系统安装
下载地址:https://github.com/duckdb/duckdb/releases/download/v1.1.3/duckdb_cli-windows-amd64.zip
下载后进行解压:
点击进入解压目录,双击运行duckdbe.exe 程序:
运行成功后:
文件格式
json
json 导入
json 导出
csv
csv 导入
csv 导出
Excel
Excel导入
# 直接读取数据
SELECT * FROM st_read('output.xlsx');
# 读取数据并创建表格导入数据
CREATE TABLE new_tbl AS SELECT * FROM st_read('output.xlsx');
select * from new_tbl;
Excel导出
# 安装并导入扩展
INSTALL spatial;
LOAD spatial;
# 导出数据
COPY (SELECT * FROM regs) TO 'output.xlsx' WITH (FORMAT GDAL, DRIVER 'xlsx');
parquet
parquet 导入
parquet 导出
对象存储
ali oss
兼容 s3 协议的对象存储使用如下配置访问:
```SQL
-- 配置 决策引擎日志访问权限
CREATE PERSISTENT SECRET oss_brde (
TYPE S3,
KEY_ID 'OSS ACCESS ID',
SECRET 'OSS ACCESS KEY',
endpoint 'oss-cn-shanghai.aliyuncs.com',
REGION 'oss-cn-shanghai',
SCOPE 's3://bucket_name'
);
-- 查询 secret 配置, 此时应该看到 name(第一列) 为 oss_brde 的配置.
select * from duckdb_secrets();
-- 删除 secret 配置
DROP PERSISTENT SECRET oss_brde;
配置完成后即可访问 ali oss 上的数据:
select * from read_json('s3://brde/area_base_demo.json');
load data
数据明细结构如下所示:
// 这是其中一条log日志 主要对shadow_output:[{},{},{}]进行展开
{
"_timestamp":1732518749379000,
"agent":"{"ephemeral_id":"17fee4a3-9865-4542-8779-ddd5bb492dd7",
"id":"38db1be3-001e-4193-b433-f6405275e68d",
"name":"brde-59f77f89b-7bmft",
"type":"filebeat",
"version":"8.14.1,
"ecs":"{"version":"8.0.0"}",
"event_day":"2024-11-25",
"event_hour":"15:00",
"host":"{"name":"brde-59f77f89b-7bmft"}",
"id":"1efaafca1bd56d1ca9b9059487b1df63",
"input": {}
"log":"{"file":{"path":"/log/brde_datalog/DATA_LOG_6.log"},"offset":2693589}",
"proj_id":"proj_e8033f581ead4852",
"shadow_output":"[{"performance":"6.851766ms","result":{"reason":"账号黑单","result":"reject"},"rule_id":"rule_5d87861d10b84380","rule_name":"多彩规则1"}]",
"user_id":"82b3c08950464a23a72d3c5c6403730d"
}
# 解压dchk.zip后查看
tree -L 1 dchk
# 有三天的数据
dchk
├── 22
├── 23
└── 24
# 进入duckdb shell
./duckdb
进入 duckdb shell 后,
-- 查看所有数据
SELECT * FROM 'dchk/**/*.parquet';
duckdb 数据分析
创建表
-- 导入数据并创建数据表tbl(dchk下所有.parquet)
CREATE TABLE tbl as SELECT * FROM 'dchk/**/*.parquet';
查看表字段
DESCRIBE tbl;
┌───────────────┬─────────────┬─────────┬─────────┬─────────┬─────────┐
│ column_name │ column_type │ null │ key │ default │ extra │
│ varchar │ varchar │ varchar │ varchar │ varchar │ varchar │
├───────────────┼─────────────┼─────────┼─────────┼─────────┼─────────┤
│ _timestamp │ BIGINT │ YES │ │ │ │
│ agent │ VARCHAR │ YES │ │ │ │
│ ecs │ VARCHAR │ YES │ │ │ │
│ event_day │ VARCHAR │ YES │ │ │ │
│ event_hour │ VARCHAR │ YES │ │ │ │
│ host │ VARCHAR │ YES │ │ │ │
│ id │ VARCHAR │ YES │ │ │ │
│ input │ VARCHAR │ YES │ │ │ │
│ log │ VARCHAR │ YES │ │ │ │
│ proj_id │ VARCHAR │ YES │ │ │ │
│ shadow_output │ VARCHAR │ YES │ │ │ │
│ user_id │ VARCHAR │ YES │ │ │ │
├───────────────┴─────────────┴─────────┴─────────┴─────────┴─────────┤
│ 12 rows 6 columns │
└─────────────────────────────────────────────────────────────────────┘
查询表数据
此例子主要展示 duckdb 分析 json 对象和数组嵌套json对象。
-- 查询所有字段
select * from tbl limit 3;
对 shadow_output 这种复杂的 json 字段进行分析
select shadow_output from tbl limit 3;
┌───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ shadow_output │
│ varchar │
├───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┤
│ [{"performance":"16.665342ms","result":{"result":"pass"},"rule_id":"rule_5d87861d10b84380","rule_name":"多彩规则1"}] │
│ [{"performance":"16.322001ms","result":{"result":"pass"},"rule_id":"rule_5d87861d10b84380","rule_name":"多彩规则1"}] │
│ [{"performance":"6.653507ms","result":{"result":"reject"},"rule_id":"rule_5d87861d10b84380","rule_name":"多彩规则1"}] │
└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
-- 2 取result结果
select shadow_output->'$[0].result.result',id from tbl limit 3;
┌─────────────────────────────────────────┬──────────────────────────────────┐
│ (shadow_output -> '$[0].result.result') │ id │
│ json │ varchar │
├─────────────────────────────────────────┼──────────────────────────────────┤
│ "pass" │ 1efa91d27e49686aaccd6e27b025db76 │
│ "pass" │ 1efa91d2f1736e82b76809244a0d3667 │
│ "reject" │ 1efa91d383736c6bab3dfd3ebb80ba8a │
└─────────────────────────────────────────┴──────────────────────────────────┘
分组聚合
-- 3 对结果进行group
select shadow_output->'$[0].result.result' as _res,count(*) from tbl group by 1;
┌──────────┬──────────────┐
│ _res │ count_star() │
│ json │ int64 │
├──────────┼──────────────┤
│ "pass" │ 274197 │
│ "reject" │ 146436 │
└──────────┴──────────────┘
宽格式转长格式
一般是把嵌套的数据变成多行数据,方便聚合统计分析.
D select unnest(shadow_output->'$[*].result.result') as res,id from tbl limit 10;
┌──────────┬──────────────────────────────────┐
│ res │ id │
│ json │ varchar │
├──────────┼──────────────────────────────────┤
│ "pass" │ 1efa91d27e49686aaccd6e27b025db76 │
│ "pass" │ 1efa91d2f1736e82b76809244a0d3667 │
│ "reject" │ 1efa91d383736c6bab3dfd3ebb80ba8a │
│ "pass" │ 1efa91d38f286b5b9a72d4b34ff8c86d │
│ "reject" │ 1efa91d5e2ae6b24ae4d59028badd59b │
│ "reject" │ 1efa91d6f9c267beae69c1ec0b504936 │
│ "pass" │ 1efa91d7f5bd6c0ba5e6e9310d870420 │
│ "pass" │ 1efa91d7fd156dd4a5ddcd73f1ed49f9 │
│ "reject" │ 1efa91d805e36491b5e7e708e57cbe62 │
│ "reject" │ 1efa91d80fed6e0f863660f457778c84 │
├──────────┴──────────────────────────────────┤
│ 10 rows 2 columns │
└─────────────────────────────────────────────┘
-- _timestamp 字段是微秒(us), 所以需要除以 1000000 转化为秒
D select date_trunc('hour',to_timestamp((_timestamp/1000000))) as hour,shadow_output->'$[*].result.result' as res from tbl limit 3;
┌──────────────────────────┬────────────┐
│ hour │ res │
│ timestamp with time zone │ json[] │
├──────────────────────────┼────────────┤
│ 2024-11-23 06:00:00+08 │ ["pass"] │
│ 2024-11-23 06:00:00+08 │ ["pass"] │
│ 2024-11-23 06:00:00+08 │ ["reject"] │
└──────────────────────────┴────────────┘
-- unnest 将嵌套的数组变成多行(宽格式转成长格式)
D select date_trunc('hour',to_timestamp((_timestamp/1000000))) as hour,id,unnest(shadow_output->'$[*].result.result') as res from tbl limit 2;
┌──────────────────────────┬──────────────────────────────────┬────────┐
│ hour │ id │ res │
│ timestamp with time zone │ varchar │ json │
├──────────────────────────┼──────────────────────────────────┼────────┤
│ 2024-11-23 06:00:00+08 │ 1efa91d27e49686aaccd6e27b025db76 │ "pass" │
│ 2024-11-23 06:00:00+08 │ 1efa91d2f1736e82b76809244a0d3667 │ "pass" │
└──────────────────────────┴──────────────────────────────────┴────────┘
-- 对长格式进行分组绝活
D with unnest_tbl as (select date_trunc('hour',to_timestamp((_timestamp/1000000))) as hour,id,unnest(shadow_output->'$[*].result.result') as res from tbl)
select hour,res,count(*) from unnest_tbl group by 1, 2 limit 5;
┌──────────────────────────┬──────────┬──────────────┐
│ hour │ res │ count_star() │
│ timestamp with time zone │ json │ int64 │
├──────────────────────────┼──────────┼──────────────┤
│ 2024-11-24 14:00:00+08 │ "reject" │ 28096 │
│ 2024-11-24 15:00:00+08 │ "pass" │ 4103 │
│ 2024-11-24 17:00:00+08 │ "pass" │ 1256 │
│ 2024-11-24 21:00:00+08 │ "pass" │ 1836 │
│ 2024-11-24 22:00:00+08 │ "reject" │ 868 │
└──────────────────────────┴──────────┴──────────────┘
duckdb 练习
Querying Parquet with Precision Using DuckDB
-- 创建
CREATE TABLE tbl2 (id varchar,j JSON);
-- 插入
INSERT INTO tbl2 VALUES ('1efa91d27e49686aaccd6e27b025db76','[{"performance":"16.665342ms","result":{"res":"pass"},"rule_id":"rule_5d87861d10b84381","rule_name":"规则1"},{"performance":"16.322001ms","result":{"res":"pass"},"rule_id":"rule_5d87861d10b84382","rule_name":"规则2"},{"performance":"6.653507ms","result":{"res":"reject"},"rule_id":"rule_5d87861d10b84383","rule_name":"规则3"}]');
-- 查询
select * from tbl2;
──────────────────────┬─────────────────────────────────────────────────────────────────────────────────────────────────────────
│ id │ j
│ varchar │ json
├──────────────────────┼────────────────────────────────────────────────────────────────────────────────────────────────────────
│ 1efa91d27e49686aac… │ [{"performance":"16.665342ms","result":{"res":"pass"},"rule_id":"rule_5d87861d10b84381","rule_name":"规则1"},
{"performance":"16.322001ms","result":{"res":"pass"},"rule_id":"rule_5d87861d10b84382","rule_name":"规则2"},
{"performance":"6.653507ms","result":{"res":"reject"},"rule_id":"rule_5d87861d10b84383","rule_name":" 规则3"}]
└──────────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────
-- 查询2
select j->'$[*].result.res' from tbl2;
┌─────────────────────────────┐
│ (j -> '$[*].result.res') │
│ json[] │
├─────────────────────────────┤
│ ["pass", "pass", "reject"] │
└─────────────────────────────┘
-- 利用unnest 展开
select id, unnest(j->'$[*].result.res')from tbl2;
┌──────────────────────────────────┬──────────────────────────────────┐
│ id │ unnest((j -> '$[*].result.res')) │
│ varchar │ json │
├──────────────────────────────────┼──────────────────────────────────┤
│ 1efa91d27e49686aaccd6e27b025db76 │ "pass" │
│ 1efa91d27e49686aaccd6e27b025db76 │ "pass" │
│ 1efa91d27e49686aaccd6e27b025db76 │ "reject" │
└──────────────────────────────────┴──────────────────────────────────┘
测试数据
{ "store": {
"book": [
{ "category": "reference",
"author": "Nigel Rees",
"title": "Sayings of the Century",
"price": 8.95
},
{ "category": "fiction",
"author": "Evelyn Waugh",
"title": "Sword of Honour",
"price": 12.99
},
{ "category": "fiction",
"author": "Herman Melville",
"title": "Moby Dick",
"isbn": "0-553-21311-3",
"price": 8.99
},
{ "category": "fiction",
"author": "J. R. R. Tolkien",
"title": "The Lord of the Rings",
"isbn": "0-395-19395-8",
"price": 22.99
}
],
"bicycle": {
"color": "red",
"price": 19.95
}
}
}
CREATE TABLE tbl3 (j JSON);
insert into tbl3 values ('{"store":{"book":[{"category":"reference","author":"Nigel Rees","title":"Sayings of the Century","price":8.95},{"category":"fiction","author":"Evelyn Waugh","title":"Sword of Honour","price":12.99},{"category":"fiction","author":"Herman Melville","title":"Moby Dick","isbn":"0-553-21311-3","price":8.99},{"category":"fiction","author":"J. R. R. Tolkien","title":"The Lord of the Rings","isbn":"0-395-19395-8","price":22.99}],"bicycle":{"color":"red","price":19.95}}}');
select j->'$.store.book[*].author' from tbl3;
select j->'$..author' from tbl3;
select j->'$.store.*' from tbl3;
select j->'$.store..price' from tbl3;
select j->'$..book[2]' from tbl3;
select j->'$..book[#-1]' from tbl3;
jsonpath
不是所有的 jsonpath 路径都支持。支持的部分如下:
- $.store.book[*].author
- $..author
- $.store.*
- $.store..price
- $..book[2]
- $..book[#-1]
滑动窗口计算
测试数据
{"status": 0, "data": {"rule_id": "rule_ce53aadda8d241cc", "rule_name": "\u6ce8\u518c\u793a\u4f8b", "performance": "129.555575ms", "result": {"ip_user_ucnt": {"v": "13309671584", "gpv": 1, "gidle": 0, "uv": 1, "pv": 1, "vidle": 0, "group": "119.147.71.133", "timestamp": "1734425282"}, "register_result": ["\u6ce8\u518cip\u53d8\u66f4"], "ipcnt": {"timestamp": "1734425282", "v": "119.147.71.133", "counter": 11, "idle": 3043995}, "ip": "119.147.71.133", "user": "13309671584", "send_email": {"message": "success"}, "user_ip_ucnt": {"group": "13309671584", "gpv": 1, "pv": 1, "v": "119.147.71.133", "vidle": 0, "gidle": 0, "uv": 1, "timestamp": "1734425282"}, "white_list": {"result": false}, "black_list": {"result": false}}, "trace_id": "1efbc53a00536fa5b7211ecc6351715f"}, "message": "version 1.0"}
{"status": 0, "data": {"rule_id": "rule_ce53aadda8d241cc", "rule_name": "\u6ce8\u518c\u793a\u4f8b", "performance": "137.862074ms", "result": {"user_ip_ucnt": {"uv": 1, "gpv": 1, "group": "13625643658", "timestamp": "1734425291", "pv": 1, "gidle": 0, "vidle": 0, "v": "119.147.71.133"}, "ip_user_ucnt": {"group": "119.147.71.133", "timestamp": "1734425291", "uv": 2, "gidle": 9330, "gpv": 2, "pv": 1, "vidle": 0, "v": "13625643658"}, "send_email": {"message": "success"}, "black_list": {"result": false}, "ip": "119.147.71.133", "register_result": ["\u6ce8\u518cip\u53d8\u66f4", "\u5355\u4e00ip\u4e0b\u591a\u4e2a\u7528\u6237\u6ce8\u518c"], "ipcnt": {"idle": 9330, "counter": 12, "timestamp": "1734425291", "v": "119.147.71.133"}, "white_list": {"result": false}, "user": "13625643658"}, "trace_id": "1efbc53a59636184873ec5ce0b6c9f89"}, "message": "version 1.0"}
{"status": 0, "data": {"rule_id": "rule_ce53aadda8d241cc", "rule_name": "\u6ce8\u518c\u793a\u4f8b", "performance": "121.025392ms", "result": {"black_list": {"result": false}, "register_result": ["\u6ce8\u518cip\u53d8\u66f4", "\u5355\u4e00ip\u4e0b\u591a\u4e2a\u7528\u6237\u6ce8\u518c"], "user": "13309671584", "ip_user_ucnt": {"timestamp": "1734425299", "pv": 2, "v": "13309671584", "vidle": 17640, "gpv": 3, "group": "119.147.71.133", "uv": 2, "gidle": 8310}, "ipcnt": {"counter": 13, "timestamp": "1734425299", "idle": 8310, "v": "119.147.71.133"}, "ip": "119.147.71.133", "send_email": {"message": "success"}, "user_ip_ucnt": {"uv": 1, "pv": 2, "timestamp": "1734425299", "group": "13309671584", "vidle": 17640, "v": "119.147.71.133", "gidle": 17640, "gpv": 2}, "white_list": {"result": false}}, "trace_id": "1efbc53aa87b69efaa39e3d5badee095"}, "message": "version 1.0"}
{"status": 0, "data": {"rule_id": "rule_ce53aadda8d241cc", "rule_name": "\u6ce8\u518c\u793a\u4f8b", "performance": "5.245953ms", "result": {"white_list": {"result": false}, "ip_user_ucnt": {"gidle": 0, "v": "13625643658", "pv": 1, "timestamp": "1734425308", "vidle": 0, "uv": 1, "group": "111.85.210.139", "gpv": 1}, "user_ip_ucnt": {"pv": 1, "v": "111.85.210.139", "gpv": 2, "vidle": 0, "uv": 2, "group": "13625643658", "gidle": 16606, "timestamp": "1734425308"}, "ipcnt": {"idle": 3056539, "v": "111.85.210.139", "counter": 5, "timestamp": "1734425308"}, "user": "13625643658", "ip": "111.85.210.139", "register_result": ["\u6ce8\u518cip\u53d8\u66f4", "\u5355\u4e00\u7528\u6237\u4e0b\u591a\u4e2aip\u6ce8\u518c"], "black_list": {"result": false}}, "trace_id": "1efbc53af67d6cd8acd0ebf106fe91ba"}, "message": "version 1.0"}
{"status": 0, "data": {"rule_id": "rule_ce53aadda8d241cc", "rule_name": "\u6ce8\u518c\u793a\u4f8b", "performance": "5.340705ms", "result": {"ip_user_ucnt": {"timestamp": "1734425311", "pv": 1, "vidle": 0, "group": "111.85.210.139", "v": "17707115955", "uv": 2, "gpv": 2, "gidle": 3165}, "ip": "111.85.210.139", "user": "17707115955", "user_ip_ucnt": {"vidle": 0, "pv": 1, "v": "111.85.210.139", "gpv": 1, "group": "17707115955", "timestamp": "1734425311", "uv": 1, "gidle": 0}, "white_list": {"result": false}, "black_list": {"result": false}, "register_result": ["\u6ce8\u518cip\u53d8\u66f4", "\u5355\u4e00ip\u4e0b\u591a\u4e2a\u7528\u6237\u6ce8\u518c"], "ipcnt": {"idle": 3165, "v": "111.85.210.139", "counter": 6, "timestamp": "1734425311"}}, "trace_id": "1efbc53b14aa66bb9504d2afe03db53a"}, "message": "version 1.0"}
{"status": 0, "data": {"rule_id": "rule_ce53aadda8d241cc", "rule_name": "\u6ce8\u518c\u793a\u4f8b", "performance": "122.779011ms", "result": {"register_result": ["\u6ce8\u518cip\u53d8\u66f4", "\u5355\u4e00ip\u4e0b\u591a\u4e2a\u7528\u6237\u6ce8\u518c"], "black_list": {"result": false}, "ip": "119.147.71.133", "ip_user_ucnt": {"pv": 3, "group": "119.147.71.133", "gidle": 17669, "gpv": 4, "vidle": 17669, "uv": 2, "timestamp": "1734425317", "v": "13309671584"}, "send_email": {"message": "success"}, "user": "13309671584", "user_ip_ucnt": {"vidle": 17669, "uv": 1, "gidle": 17669, "timestamp": "1734425317", "v": "119.147.71.133", "group": "13309671584", "pv": 3, "gpv": 3}, "white_list": {"result": false}, "ipcnt": {"timestamp": "1734425317", "idle": 17669, "counter": 14, "v": "119.147.71.133"}}, "trace_id": "1efbc53b50fd64c2bcc603910b1151c5"}, "message": "version 1.0"}
{"status": 0, "data": {"rule_id": "rule_ce53aadda8d241cc", "rule_name": "\u6ce8\u518c\u793a\u4f8b", "performance": "5.497546ms", "result": {"user_ip_ucnt": {"pv": 1, "gpv": 4, "gidle": 8276, "timestamp": "1734425325", "uv": 2, "vidle": 0, "v": "111.85.210.139", "group": "13309671584"}, "register_result": ["\u6ce8\u518cip\u53d8\u66f4", "\u5355\u4e00ip\u4e0b\u591a\u4e2a\u7528\u6237\u6ce8\u518c", "\u5355\u4e00\u7528\u6237\u4e0b\u591a\u4e2aip\u6ce8\u518c"], "white_list": {"result": false}, "black_list": {"result": false}, "ip_user_ucnt": {"group": "111.85.210.139", "pv": 1, "gpv": 3, "uv": 3, "gidle": 14484, "timestamp": "1734425325", "v": "13309671584", "vidle": 0}, "ip": "111.85.210.139", "ipcnt": {"v": "111.85.210.139", "timestamp": "1734425325", "idle": 14484, "counter": 7}, "user": "13309671584"}, "trace_id": "1efbc53b9ecd683f878b56a2a86c6e44"}, "message": "version 1.0"}
{"status": 0, "data": {"rule_id": "rule_ce53aadda8d241cc", "rule_name": "\u6ce8\u518c\u793a\u4f8b", "performance": "7.012836ms", "result": {"register_result": ["\u6ce8\u518cip\u53d8\u66f4", "\u5355\u4e00ip\u4e0b\u591a\u4e2a\u7528\u6237\u6ce8\u518c", "\u5355\u4e00\u7528\u6237\u4e0b\u591a\u4e2aip\u6ce8\u518c"], "white_list": {"result": false}, "black_list": {"result": false}, "user": "13309671584", "ip": "111.85.210.139", "ip_user_ucnt": {"pv": 2, "timestamp": "1734425332", "uv": 3, "gidle": 6192, "gpv": 4, "vidle": 6192, "group": "111.85.210.139", "v": "13309671584"}, "user_ip_ucnt": {"pv": 2, "timestamp": "1734425332", "uv": 2, "v": "111.85.210.139", "gidle": 6192, "gpv": 5, "vidle": 6192, "group": "13309671584"}, "ipcnt": {"v": "111.85.210.139", "idle": 6192, "counter": 8, "timestamp": "1734425332"}}, "trace_id": "1efbc53bd9da6af9ba9e9f4fe8223f1e"}, "message": "version 1.0"}
{"status": 0, "data": {"rule_id": "rule_ce53aadda8d241cc", "rule_name": "\u6ce8\u518c\u793a\u4f8b", "performance": "6.075249ms", "result": {"black_list": {"result": false}, "white_list": {"result": false}, "ip": "111.85.210.139", "register_result": ["\u6ce8\u518cip\u53d8\u66f4", "\u5355\u4e00ip\u4e0b\u591a\u4e2a\u7528\u6237\u6ce8\u518c", "\u5355\u4e00\u7528\u6237\u4e0b\u591a\u4e2aip\u6ce8\u518c"], "user_ip_ucnt": {"timestamp": "1734425337", "group": "13625643658", "v": "111.85.210.139", "pv": 2, "vidle": 29004, "gidle": 29004, "gpv": 3, "uv": 2}, "ip_user_ucnt": {"group": "111.85.210.139", "uv": 3, "v": "13625643658", "gpv": 5, "vidle": 29004, "pv": 2, "gidle": 5163, "timestamp": "1734425337"}, "user": "13625643658", "ipcnt": {"timestamp": "1734425337", "counter": 9, "idle": 5163, "v": "111.85.210.139"}}, "trace_id": "1efbc53c0b18641c9442b643aa01f46c"}, "message": "version 1.0"}
{"status": 0, "data": {"rule_id": "rule_ce53aadda8d241cc", "rule_name": "\u6ce8\u518c\u793a\u4f8b", "performance": "5.739437ms", "result": {"ipcnt": {"v": "111.85.210.139", "counter": 10, "timestamp": "1734425343", "idle": 6185}, "register_result": ["\u6ce8\u518cip\u53d8\u66f4", "\u5355\u4e00ip\u4e0b\u591a\u4e2a\u7528\u6237\u6ce8\u518c", "\u5355\u4e00\u7528\u6237\u4e0b\u591a\u4e2aip\u6ce8\u518c"], "white_list": {"result": false}, "user": "13625643658", "ip_user_ucnt": {"uv": 3, "pv": 3, "group": "111.85.210.139", "timestamp": "1734425343", "gpv": 6, "gidle": 6185, "v": "13625643658", "vidle": 6185}, "black_list": {"result": false}, "ip": "111.85.210.139", "user_ip_ucnt": {"v": "111.85.210.139", "pv": 3, "gidle": 6185, "uv": 2, "gpv": 4, "group": "13625643658", "vidle": 6185, "timestamp": "1734425343"}}, "trace_id": "1efbc53c46136638b0cdb695d9c9b9c3"}, "message": "version 1.0"}
{"status": 0, "data": {"rule_id": "rule_ce53aadda8d241cc", "rule_name": "\u6ce8\u518c\u793a\u4f8b", "performance": "123.696608ms", "result": {"user_ip_ucnt": {"v": "111.85.210.139", "gpv": 2, "uv": 1, "pv": 2, "gidle": 37203, "timestamp": "1734425348", "vidle": 37203, "group": "17707115955"}, "black_list": {"result": false}, "user": "17707115955", "register_result": ["\u6ce8\u518cip\u53d8\u66f4", "\u5355\u4e00ip\u4e0b\u591a\u4e2a\u7528\u6237\u6ce8\u518c"], "ip": "111.85.210.139", "white_list": {"result": false}, "ip_user_ucnt": {"gidle": 5179, "timestamp": "1734425348", "uv": 3, "v": "17707115955", "pv": 2, "vidle": 37203, "gpv": 7, "group": "111.85.210.139"}, "ipcnt": {"counter": 11, "timestamp": "1734425348", "v": "111.85.210.139", "idle": 5179}, "send_email": {"message": "success"}}, "trace_id": "1efbc53c78996cc39a7f349ac2cc693a"}, "message": "version 1.0"}
{"status": 0, "data": {"rule_id": "rule_ce53aadda8d241cc", "rule_name": "\u6ce8\u518c\u793a\u4f8b", "performance": "125.573966ms", "result": {"white_list": {"result": false}, "black_list": {"result": false}, "user_ip_ucnt": {"gidle": 18837, "vidle": 33305, "gpv": 5, "pv": 3, "group": "13309671584", "uv": 1, "v": "119.147.71.133", "timestamp": "1734425350"}, "register_result": ["\u6ce8\u518cip\u53d8\u66f4"], "ip_user_ucnt": {"vidle": 33305, "gidle": 33305, "group": "119.147.71.133", "timestamp": "1734425350", "gpv": 4, "uv": 1, "pv": 3, "v": "13309671584"}, "ipcnt": {"v": "119.147.71.133", "timestamp": "1734425350", "idle": 33305, "counter": 14}, "ip": "119.147.71.133", "send_email": {"message": "success"}, "user": "13309671584"}, "trace_id": "1efbc53c8ea06dadab4150f27f340ce0"}, "message": "version 1.0"}
{"status": 0, "data": {"rule_id": "rule_ce53aadda8d241cc", "rule_name": "\u6ce8\u518c\u793a\u4f8b", "performance": "123.467784ms", "result": {"send_email": {"message": "success"}, "user_ip_ucnt": {"v": "119.147.71.133", "vidle": 0, "gpv": 3, "group": "17707115955", "uv": 2, "gidle": 4608, "timestamp": "1734425353", "pv": 1}, "black_list": {"result": false}, "ip": "119.147.71.133", "ipcnt": {"idle": 2298, "counter": 15, "timestamp": "1734425353", "v": "119.147.71.133"}, "user": "17707115955", "register_result": ["\u6ce8\u518cip\u53d8\u66f4", "\u5355\u4e00ip\u4e0b\u591a\u4e2a\u7528\u6237\u6ce8\u518c", "\u5355\u4e00\u7528\u6237\u4e0b\u591a\u4e2aip\u6ce8\u518c"], "white_list": {"result": false}, "ip_user_ucnt": {"gpv": 5, "timestamp": "1734425353", "pv": 1, "gidle": 2298, "group": "119.147.71.133", "v": "17707115955", "vidle": 0, "uv": 2}}, "trace_id": "1efbc53ca48c68e79a200170050c0efd"}, "message": "version 1.0"}
{"status": 0, "data": {"rule_id": "rule_ce53aadda8d241cc", "rule_name": "\u6ce8\u518c\u793a\u4f8b", "performance": "122.802121ms", "result": {"ip_user_ucnt": {"gpv": 8, "group": "111.85.210.139", "v": "13625643658", "vidle": 11088, "gidle": 5909, "uv": 3, "timestamp": "1734425354", "pv": 4}, "user_ip_ucnt": {"pv": 4, "v": "111.85.210.139", "uv": 1, "gpv": 4, "vidle": 11088, "timestamp": "1734425354", "gidle": 11088, "group": "13625643658"}, "ip": "111.85.210.139", "user": "13625643658", "ipcnt": {"counter": 12, "timestamp": "1734425354", "idle": 5909, "v": "111.85.210.139"}, "black_list": {"result": false}, "register_result": ["\u6ce8\u518cip\u53d8\u66f4", "\u5355\u4e00ip\u4e0b\u591a\u4e2a\u7528\u6237\u6ce8\u518c"], "white_list": {"result": false}, "send_email": {"message": "success"}}, "trace_id": "1efbc53cb0ef673dae62bd6461e2e69f"}, "message": "version 1.0"}
{"status": 0, "data": {"rule_id": "rule_ce53aadda8d241cc", "rule_name": "\u6ce8\u518c\u793a\u4f8b", "performance": "126.948236ms", "result": {"user_ip_ucnt": {"gpv": 5, "group": "13309671584", "timestamp": "1734425355", "vidle": 23735, "pv": 3, "uv": 1, "v": "111.85.210.139", "gidle": 4898}, "white_list": {"result": false}, "ipcnt": {"v": "111.85.210.139", "timestamp": "1734425355", "idle": 1299, "counter": 13}, "black_list": {"result": false}, "user": "13309671584", "register_result": ["\u6ce8\u518cip\u53d8\u66f4", "\u5355\u4e00ip\u4e0b\u591a\u4e2a\u7528\u6237\u6ce8\u518c"], "send_email": {"message": "success"}, "ip_user_ucnt": {"pv": 3, "uv": 3, "group": "111.85.210.139", "gpv": 9, "gidle": 1299, "timestamp": "1734425355", "vidle": 23735, "v": "13309671584"}, "ip": "111.85.210.139"}, "trace_id": "1efbc53cbd5e6262956460b2f6af8615"}, "message": "version 1.0"}
{"status": 0, "data": {"rule_id": "rule_ce53aadda8d241cc", "rule_name": "\u6ce8\u518c\u793a\u4f8b", "performance": "115.331404ms", "result": {"register_result": ["\u6ce8\u518cip\u53d8\u66f4", "\u5355\u4e00ip\u4e0b\u591a\u4e2a\u7528\u6237\u6ce8\u518c"], "send_email": {"message": "success"}, "black_list": {"result": false}, "ip": "111.85.210.139", "ipcnt": {"timestamp": "1734425357", "idle": 1285, "counter": 14, "v": "111.85.210.139"}, "user_ip_ucnt": {"uv": 1, "gpv": 6, "gidle": 1285, "group": "13309671584", "timestamp": "1734425357", "pv": 4, "v": "111.85.210.139", "vidle": 1285}, "white_list": {"result": false}, "ip_user_ucnt": {"group": "111.85.210.139", "uv": 3, "gidle": 1285, "timestamp": "1734425357", "pv": 4, "vidle": 1285, "gpv": 10, "v": "13309671584"}, "user": "13309671584"}, "trace_id": "1efbc53cc98367f6b6916f049c614ba7"}, "message": "version 1.0"}
{"status": 0, "data": {"rule_id": "rule_ce53aadda8d241cc", "rule_name": "\u6ce8\u518c\u793a\u4f8b", "performance": "118.008882ms", "result": {"ip_user_ucnt": {"gpv": 11, "timestamp": "1734425361", "gidle": 4269, "pv": 3, "v": "17707115955", "vidle": 12762, "group": "111.85.210.139", "uv": 3}, "user": "17707115955", "white_list": {"result": false}, "ipcnt": {"counter": 15, "idle": 4269, "timestamp": "1734425361", "v": "111.85.210.139"}, "register_result": ["\u6ce8\u518cip\u53d8\u66f4", "\u5355\u4e00ip\u4e0b\u591a\u4e2a\u7528\u6237\u6ce8\u518c", "\u5355\u4e00\u7528\u6237\u4e0b\u591a\u4e2aip\u6ce8\u518c"], "black_list": {"result": false}, "send_email": {"message": "success"}, "ip": "111.85.210.139", "user_ip_ucnt": {"gpv": 4, "uv": 2, "gidle": 8154, "vidle": 12762, "v": "111.85.210.139", "timestamp": "1734425361", "pv": 3, "group": "17707115955"}}, "trace_id": "1efbc53cf241608aaadd22b694831787"}, "message": "version 1.0"}
{"status": 0, "data": {"rule_id": "rule_ce53aadda8d241cc", "rule_name": "\u6ce8\u518c\u793a\u4f8b", "performance": "130.230356ms", "result": {"register_result": ["\u6ce8\u518cip\u53d8\u66f4", "\u5355\u4e00ip\u4e0b\u591a\u4e2a\u7528\u6237\u6ce8\u518c", "\u5355\u4e00\u7528\u6237\u4e0b\u591a\u4e2aip\u6ce8\u518c"], "user_ip_ucnt": {"timestamp": "1734425364", "uv": 2, "gpv": 5, "v": "119.147.71.133", "group": "13625643658", "pv": 1, "vidle": 0, "gidle": 10143}, "ip": "119.147.71.133", "send_email": {"message": "success"}, "ip_user_ucnt": {"v": "13625643658", "gpv": 5, "group": "119.147.71.133", "pv": 1, "timestamp": "1734425364", "gidle": 11444, "uv": 3, "vidle": 0}, "user": "13625643658", "ipcnt": {"counter": 16, "timestamp": "1734425364", "v": "119.147.71.133", "idle": 11444}, "black_list": {"result": false}, "white_list": {"result": false}}, "trace_id": "1efbc53d11bf6f1d8b92b3a3fcb0e6e9"}, "message": "version 1.0"}
{"status": 0, "data": {"rule_id": "rule_ce53aadda8d241cc", "rule_name": "\u6ce8\u518c\u793a\u4f8b", "performance": "117.003039ms", "result": {"ip": "119.147.71.133", "ip_user_ucnt": {"v": "17707115955", "vidle": 18732, "gidle": 7288, "uv": 3, "gpv": 6, "group": "119.147.71.133", "pv": 2, "timestamp": "1734425371"}, "register_result": ["\u6ce8\u518cip\u53d8\u66f4", "\u5355\u4e00ip\u4e0b\u591a\u4e2a\u7528\u6237\u6ce8\u518c"], "black_list": {"result": false}, "send_email": {"message": "success"}, "user_ip_ucnt": {"uv": 1, "v": "119.147.71.133", "pv": 2, "gidle": 10578, "timestamp": "1734425371", "vidle": 18732, "gpv": 4, "group": "17707115955"}, "white_list": {"result": false}, "user": "17707115955", "ipcnt": {"timestamp": "1734425371", "counter": 17, "idle": 7288, "v": "119.147.71.133"}}, "trace_id": "1efbc53d571f68cabb0aefdf22b39e4a"}, "message": "version 1.0"}
{"status": 0, "data": {"rule_id": "rule_ce53aadda8d241cc", "rule_name": "\u6ce8\u518c\u793a\u4f8b", "performance": "122.992814ms", "result": {"ip": "119.147.71.133", "ipcnt": {"idle": 4312, "counter": 18, "timestamp": "1734425376", "v": "119.147.71.133"}, "user_ip_ucnt": {"group": "13309671584", "vidle": 25342, "uv": 0, "gpv": 6, "v": "119.147.71.133", "timestamp": "1734425376", "gidle": 19159, "pv": 3}, "user": "13309671584", "black_list": {"result": false}, "white_list": {"result": false}, "register_result": ["\u6ce8\u518cip\u53d8\u66f4", "\u5355\u4e00ip\u4e0b\u591a\u4e2a\u7528\u6237\u6ce8\u518c"], "ip_user_ucnt": {"group": "119.147.71.133", "gidle": 4312, "uv": 2, "timestamp": "1734425376", "v": "13309671584", "pv": 3, "vidle": 25342, "gpv": 6}, "send_email": {"message": "success"}}, "trace_id": "1efbc53d804d69a9a04575adbf8afdae"}, "message": "version 1.0"}
数据清洗
执行下列sql 清洗数据,提取分析所需字段
CREATE TABLE regs as SELECT
to_timestamp((data ->>'$.result.ipcnt.timestamp')::int) as date,
CAST(data ->>'$.result.ipcnt.timestamp' as INTEGER) as op_timestamp,
data ->>'$.result.ip'as ip,data ->>'$.result.user'as user
from read_json("./register_rule_data.json");
数据清洗结果
select * from regs;
指标计算
执行下列sql 进行滑动窗口计算,计算对应指标数据
SELECT
date,
op_timestamp,
ip,
user,
COUNT(*) OVER gpv_rate AS gpv_l1m,COUNT(*) OVER pv_rate AS pv_l1m,
COUNT(distinct (user)) OVER uv_rate AS uv_l1m,
op_timestamp -lag(op_timestamp, 1) OVER gidle_res AS gidle,
op_timestamp -lag(op_timestamp, 1) OVER vidle_res AS vidle
FROM regs
WINDOW
gpv_rate AS (PARTITION BY ip ORDER BY date RANGE BETWEEN INTERVAL '1 minute' PRECEDING AND CURRENT ROW),
pv_rate AS (PARTITION BY (ip, user) ORDER BY date RANGE BETWEEN INTERVAL '1 minute' PRECEDING AND CURRENT ROW),
uv_rate AS (PARTITION BY ip ORDER BY date RANGE BETWEEN INTERVAL '1 minute' PRECEDING AND CURRENT ROW),
gidle_res AS (PARTITION BY ip ORDER BY date RANGE BETWEEN INTERVAL '1 minute' PRECEDING AND CURRENT ROW),
vidle_res AS (PARTITION BY (ip, user) ORDER BY date RANGE BETWEEN INTERVAL '1 minute' PRECEDING AND CURRENT ROW)
ORDER BY date;
计算结果