Vector Configuration
vector 是一个集日志, metrics 功能于一身, 充当 agent, server和消费者多种角色于一体的数据搜集工具。可以用于搜集程序日志,搜集机器,容器运行指标,将结果进行转化处理后输出至下游对象存储,clickhose, influxdb, openobserve(es) 等系统的工具。
在我们的业务中,主要用来搜集程序运行日志,业务数据日志,以及机器监控指标的任务。尤其是在多个vpc内进行相关日志和指标的搜集,中继,持久化等任务。
config files
json日志
vector json日志配置文件
# __ __ __
# \ \ / / / /
# \ V / / /
# \_/ \/
#
# V E C T O R
# Configuration
#
# ------------------------------------------------------------------------------
# Website: https://vector.dev
# Docs: https://vector.dev/docs
# Chat: https://chat.vector.dev
# ------------------------------------------------------------------------------
# Change this to use a non-default directory for Vector data storage:
# data_dir: "/var/lib/vector"
# 数据日志, 只需要把 message 字段下字段提取到最顶层即可.
# Random Syslog-formatted logs
sources:
dummy_logs:
type: "demo_logs"
format: "syslog"
interval: 1
datafile:
type: "file"
include: ["/tmp/tmpdata/*.json"] # "/var/log/**/*.log"
start_at_beginning: true
# Parse Syslog logs
# See the Vector Remap Language reference for more info: https://vrl.dev
transforms:
parse_logs:
type: "remap"
inputs: ["dummy_logs"]
source: |
. = parse_syslog!(string!(.message))
# Print parsed logs to stdout
sinks:
print:
type: "console"
inputs: ["datafile"] # datafile parse_logs
encoding:
codec: "json"
json:
pretty: true
ossdata:
type: aws_s3
inputs:
- datafile
bucket: "BUCKNET_NAME"
timezone: "Asia/Shanghai"
filename_extension: "json.gz" # json 文件 json.gz 压缩json文件
compression: "gzip"
content_encoding: "gzip"
content_type: "application/gzip"
endpoint: "https://BUCKNET_NAME.oss-cn-shanghai.aliyuncs.com/brdedata/" # 记得要尾斜杠,否则会和后面的拼接成一个长字符串文件夹 #"s3://BUCKNET_NAME.oss-cn-shanghai.aliyuncs.com/BUCKNET_NAME/brde/" # "https://BUCKNET_NAME.oss-cn-shanghai.aliyuncs.com/fccdjny123/" # https://BUCKNET_NAME.oss-cn-shanghai.aliyuncs.com/vector_test/ "BUCKNET_NAME.oss-cn-shanghai.aliyuncs.com/vector_test/"
region: "oss-cn-shanghai"
auth:
access_key_id: "enter your ACCESS_KEY_id"
secret_access_key: "enter your ACCESS_KEY"
#region: "oss-cn-shanghai"
encoding:
codec: "raw_message" # json ## raw_message 只有日志文件内容. json 会包含其他的元信息.
key_prefix: "date=%F/%H/%M/" # 这里可以加入机器id作为目录区分.
batch:
timeout_secs: 60 # 测试时可以调小这个值,这样可以在对象存储看到生成的文件.
healthcheck:
enabled: false
brdedata_openobserve:
type: "http"
inputs: ["datafile"]
uri: "http://localhost:5080/api/default/brdedata/_json" # brdedata 是 stream 也就是es中的 documents # "http://localhost:5080/api/default/default/_json"
method: "post"
auth:
strategy: "basic"
user: "wanghao@geetest.com"
password: "enter your password"
compression: "gzip"
encoding:
codec: "json" # raw_message 会有问题,导致 openobserve 无法 ingest 数据.
# timestamp_format: "rfc3339"
healthcheck:
enabled: true
# Vector's GraphQL API (disabled by default)
# Uncomment to try it out with the `vector top` command or
# in your browser at http://localhost:8686
# api:
# enabled: true
# address: "127.0.0.1:8686"
[Unit]
Description=Vector
Documentation=https://vector.dev
After=network-online.target
Requires=network-online.target
[Service]
ExecStartPre=/usr/bin/vector --config /etc/vector/vector_data.yaml validate
ExecStart=/usr/bin/vector --config /etc/vector/vector_data.yaml
ExecReload=/usr/bin/vector --config /etc/vector/vector_data.yaml validate
ExecReload=/bin/kill -HUP $MAINPID
Restart=always
AmbientCapabilities=CAP_NET_BIND_SERVICE
EnvironmentFile=-/etc/default/vector
# Since systemd 229, should be in [Unit] but in order to support systemd <229,
# it is also supported to have it here.
StartLimitInterval=10
StartLimitBurst=5
[Install]
WantedBy=multi-user.target
运行日志
vector 运行日志配置文件
# __ __ __
# \ \ / / / /
# \ V / / /
# \_/ \/
#
# V E C T O R
# Configuration
#
# ------------------------------------------------------------------------------
# Website: https://vector.dev
# Docs: https://vector.dev/docs
# Chat: https://chat.vector.dev
# ------------------------------------------------------------------------------
# [sources.my_file_source.multiline]
# start_pattern = '^\[[0-9]{4}-[0-9]{2}-[0-9]{2}'
# mode = "halt_before"
# condition_pattern = '^\[[0-9]{4}-[0-9]{2}-[0-9]{2}'
# timeout_ms = 1000
# Change this to use a non-default directory for Vector data storage:
# data_dir: "/var/lib/vector"
# Random Syslog-formatted logs
sources:
dummy_logs:
type: "demo_logs"
format: "syslog"
interval: 1
logfile:
type: "file"
include: ["/tmp/tmplog/*.log"] # "/var/log/**/*.log"
start_at_beginning: true
multiline:
start_pattern: '^\[[0-9]{4}-[0-9]{2}-[0-9]{2}'
mode: "halt_before"
condition_pattern: '^\[[0-9]{4}-[0-9]{2}-[0-9]{2}'
timeout_ms: 1000
# Parse Syslog logs
# See the Vector Remap Language reference for more info: https://vrl.dev
transforms:
parse_logs:
type: "remap"
inputs: ["dummy_logs"]
source: |
. = parse_syslog!(string!(.message))
# Print parsed logs to stdout
sinks:
print:
type: "console"
inputs: ["logfile"] # logfile parse_logs
encoding:
codec: "json"
json:
pretty: true
brdelog_openobserve:
type: "http"
inputs: ["logfile"]
uri: "http://localhost:5080/api/default/brdelog/_json" # brdelog 是 stream 也就是es中的 documents # "http://localhost:5080/api/default/default/_json"
method: "post"
auth:
strategy: "basic"
user: "wanghao@geetest.com"
password: "enter your password"
compression: "gzip"
encoding:
codec: "json"
timestamp_format: "rfc3339"
healthcheck:
enabled: true
osslog:
type: aws_s3
inputs:
- logfile
bucket: "BUCKNET_NAME"
endpoint: "https://BUCKNET_NAME.oss-cn-shanghai.aliyuncs.com/brdelog/" # 记得要尾斜杠,否则会和后面的拼接成一个长字符串文件夹 #"s3://BUCKNET_NAME.oss-cn-shanghai.aliyuncs.com/BUCKNET_NAME/brde/" # "https://BUCKNET_NAME.oss-cn-shanghai.aliyuncs.com/fccdjny123/" # https://BUCKNET_NAME.oss-cn-shanghai.aliyuncs.com/vector_test/ "BUCKNET_NAME.oss-cn-shanghai.aliyuncs.com/vector_test/"
region: "oss-cn-shanghai"
auth:
access_key_id: "enter your ACCESS_KEY_id"
secret_access_key: "enter your ACCESS_KEY"
#region: "oss-cn-shanghai"
encoding:
codec: "raw_message"
timezone: "Asia/Shanghai"
key_prefix: "date=%F/%H/%M/" # "date=%F/hour=%H/" # 这里可以加入机器id作为目录区分.
batch:
timeout_secs: 60
healthcheck:
enabled: false
# Vector's GraphQL API (disabled by default)
# Uncomment to try it out with the `vector top` command or
# in your browser at http://localhost:8686
# api:
# enabled: true
# address: "127.0.0.1:8686"
[Unit]
Description=Vector
Documentation=https://vector.dev
After=network-online.target
Requires=network-online.target
[Service]
ExecStartPre=/usr/bin/vector --config /etc/vector/vector_log.yaml validate
ExecStart=/usr/bin/vector --config /etc/vector/vector_log.yaml
ExecReload=/usr/bin/vector --config /etc/vector/vector_log.yaml validate
ExecReload=/bin/kill -HUP $MAINPID
Restart=always
AmbientCapabilities=CAP_NET_BIND_SERVICE
EnvironmentFile=-/etc/default/vector
# Since systemd 229, should be in [Unit] but in order to support systemd <229,
# it is also supported to have it here.
StartLimitInterval=10
StartLimitBurst=5
[Install]
WantedBy=multi-user.target
deploy best practice
Create env file
Create systemd service file
cp systemd service file to /usr/lib/systemd/system/
dir.
sudo cp /home/ryefccd/.vector/etc/systemd/vector.service /usr/lib/systemd/system/
Enable service
ryefccd@republic:~/.vector$ ls /lib/systemd/system |grep vector
vector_data.service
vector_log.service
systemctl list-unit-files
systemctl daemon-reload
systemctl list-unit-files
systemctl enable vector_data.service
systemctl enable vector_log.service
运行结果示例:
systemctl daemon-reload
ryefccd@republic:~/.vector$ systemctl enable vector_data.service
Created symlink /etc/systemd/system/multi-user.target.wants/vector_data.service → /lib/systemd/system/vector_data.service.
ryefccd@republic:~/.vector$ systemctl enable vector_log.service
Created symlink /etc/systemd/system/multi-user.target.wants/vector_log.service → /lib/systemd/system/vector_log.service.
start
systemctl start vector_data.service
systemctl start vector_log.service
stop
systemctl stop vector
status
systemctl status vector
Test service
资料
multiline-messages
运行日志中多行日志配置.