Vector Configuration

vector 是一个集日志, metrics 功能于一身, 充当 agent, server和消费者多种角色于一体的数据搜集工具。可以用于搜集程序日志,搜集机器,容器运行指标,将结果进行转化处理后输出至下游对象存储,clickhose, influxdb, openobserve(es) 等系统的工具。

在我们的业务中,主要用来搜集程序运行日志,业务数据日志,以及机器监控指标的任务。尤其是在多个vpc内进行相关日志和指标的搜集,中继,持久化等任务。

config files

json日志

vector json日志配置文件

#                                    __   __  __
#                                    \ \ / / / /
#                                     \ V / / /
#                                      \_/  \/
#
#                                    V E C T O R
#                                   Configuration
#
# ------------------------------------------------------------------------------
# Website: https://vector.dev
# Docs: https://vector.dev/docs
# Chat: https://chat.vector.dev
# ------------------------------------------------------------------------------

# Change this to use a non-default directory for Vector data storage:
# data_dir: "/var/lib/vector"
# 数据日志, 只需要把 message 字段下字段提取到最顶层即可. 

# Random Syslog-formatted logs
sources:
  dummy_logs:
    type: "demo_logs"
    format: "syslog"
    interval: 1
  datafile:
    type: "file"
    include: ["/tmp/tmpdata/*.json"]  # "/var/log/**/*.log"
    start_at_beginning: true

# Parse Syslog logs
# See the Vector Remap Language reference for more info: https://vrl.dev
transforms:
  parse_logs:
    type: "remap"
    inputs: ["dummy_logs"]
    source: |
      . = parse_syslog!(string!(.message))

# Print parsed logs to stdout
sinks:
  print:
    type: "console"
    inputs: ["datafile"]  # datafile   parse_logs
    encoding:
      codec: "json"
      json:
        pretty: true

  ossdata:
    type: aws_s3
    inputs:
      - datafile
    bucket: "BUCKNET_NAME"
    timezone: "Asia/Shanghai"
    filename_extension: "json.gz"  # json 文件  json.gz 压缩json文件
    compression: "gzip"
    content_encoding: "gzip"
    content_type: "application/gzip"
    endpoint: "https://BUCKNET_NAME.oss-cn-shanghai.aliyuncs.com/brdedata/" # 记得要尾斜杠,否则会和后面的拼接成一个长字符串文件夹 #"s3://BUCKNET_NAME.oss-cn-shanghai.aliyuncs.com/BUCKNET_NAME/brde/" # "https://BUCKNET_NAME.oss-cn-shanghai.aliyuncs.com/fccdjny123/"  # https://BUCKNET_NAME.oss-cn-shanghai.aliyuncs.com/vector_test/  "BUCKNET_NAME.oss-cn-shanghai.aliyuncs.com/vector_test/"
    region: "oss-cn-shanghai"
    auth:
      access_key_id: "enter your ACCESS_KEY_id"
      secret_access_key: "enter your ACCESS_KEY"
      #region: "oss-cn-shanghai"
    encoding:
      codec: "raw_message"  # json  ## raw_message 只有日志文件内容. json 会包含其他的元信息.
    key_prefix: "date=%F/%H/%M/"  # 这里可以加入机器id作为目录区分.
    batch:
      timeout_secs: 60  # 测试时可以调小这个值,这样可以在对象存储看到生成的文件.
    healthcheck:
        enabled: false

  brdedata_openobserve:
    type: "http"
    inputs: ["datafile"]
    uri: "http://localhost:5080/api/default/brdedata/_json"  # brdedata 是 stream 也就是es中的 documents  # "http://localhost:5080/api/default/default/_json"
    method: "post"
    auth:
      strategy: "basic"
      user: "wanghao@geetest.com"
      password: "enter your password"
    compression: "gzip"
    encoding:
      codec: "json"  # raw_message 会有问题,导致 openobserve 无法 ingest 数据.
      # timestamp_format: "rfc3339"
    healthcheck:
      enabled: true
# Vector's GraphQL API (disabled by default)
# Uncomment to try it out with the `vector top` command or
# in your browser at http://localhost:8686
# api:
#   enabled: true
#   address: "127.0.0.1:8686" 
[Unit]
Description=Vector
Documentation=https://vector.dev
After=network-online.target
Requires=network-online.target

[Service]
ExecStartPre=/usr/bin/vector --config /etc/vector/vector_data.yaml validate
ExecStart=/usr/bin/vector --config /etc/vector/vector_data.yaml
ExecReload=/usr/bin/vector --config /etc/vector/vector_data.yaml validate
ExecReload=/bin/kill -HUP $MAINPID
Restart=always
AmbientCapabilities=CAP_NET_BIND_SERVICE
EnvironmentFile=-/etc/default/vector
# Since systemd 229, should be in [Unit] but in order to support systemd <229,
# it is also supported to have it here.
StartLimitInterval=10
StartLimitBurst=5
[Install]
WantedBy=multi-user.target 

运行日志

vector 运行日志配置文件

#                                    __   __  __
#                                    \ \ / / / /
#                                     \ V / / /
#                                      \_/  \/
#
#                                    V E C T O R
#                                   Configuration
#
# ------------------------------------------------------------------------------
# Website: https://vector.dev
# Docs: https://vector.dev/docs
# Chat: https://chat.vector.dev
# ------------------------------------------------------------------------------

# [sources.my_file_source.multiline]
# start_pattern = '^\[[0-9]{4}-[0-9]{2}-[0-9]{2}'
# mode = "halt_before"
# condition_pattern = '^\[[0-9]{4}-[0-9]{2}-[0-9]{2}'
# timeout_ms = 1000

# Change this to use a non-default directory for Vector data storage:
# data_dir: "/var/lib/vector"

# Random Syslog-formatted logs
sources:
  dummy_logs:
    type: "demo_logs"
    format: "syslog"
    interval: 1
  logfile:
    type: "file"
    include: ["/tmp/tmplog/*.log"]  # "/var/log/**/*.log"
    start_at_beginning: true
    multiline:
      start_pattern: '^\[[0-9]{4}-[0-9]{2}-[0-9]{2}'
      mode: "halt_before"
      condition_pattern: '^\[[0-9]{4}-[0-9]{2}-[0-9]{2}'
      timeout_ms: 1000


# Parse Syslog logs
# See the Vector Remap Language reference for more info: https://vrl.dev
transforms:
  parse_logs:
    type: "remap"
    inputs: ["dummy_logs"]
    source: |
      . = parse_syslog!(string!(.message))

# Print parsed logs to stdout
sinks:
  print:
    type: "console"
    inputs: ["logfile"]  # logfile   parse_logs
    encoding:
      codec: "json"
      json:
        pretty: true

  brdelog_openobserve:
    type: "http"
    inputs: ["logfile"]
    uri: "http://localhost:5080/api/default/brdelog/_json"  # brdelog 是 stream 也就是es中的 documents  # "http://localhost:5080/api/default/default/_json"
    method: "post"
    auth:
      strategy: "basic"
      user: "wanghao@geetest.com"
      password: "enter your password"
    compression: "gzip"
    encoding:
      codec: "json"
      timestamp_format: "rfc3339"
    healthcheck:
      enabled: true

  osslog:
    type: aws_s3
    inputs:
      - logfile
    bucket: "BUCKNET_NAME"
    endpoint: "https://BUCKNET_NAME.oss-cn-shanghai.aliyuncs.com/brdelog/" # 记得要尾斜杠,否则会和后面的拼接成一个长字符串文件夹 #"s3://BUCKNET_NAME.oss-cn-shanghai.aliyuncs.com/BUCKNET_NAME/brde/" # "https://BUCKNET_NAME.oss-cn-shanghai.aliyuncs.com/fccdjny123/"  # https://BUCKNET_NAME.oss-cn-shanghai.aliyuncs.com/vector_test/  "BUCKNET_NAME.oss-cn-shanghai.aliyuncs.com/vector_test/"
    region: "oss-cn-shanghai"
    auth:
      access_key_id: "enter your ACCESS_KEY_id"
      secret_access_key: "enter your ACCESS_KEY"
      #region: "oss-cn-shanghai"
    encoding:
      codec: "raw_message"
    timezone: "Asia/Shanghai"
    key_prefix: "date=%F/%H/%M/"  # "date=%F/hour=%H/"  # 这里可以加入机器id作为目录区分.
    batch:
      timeout_secs: 60
    healthcheck:
        enabled: false
# Vector's GraphQL API (disabled by default)
# Uncomment to try it out with the `vector top` command or
# in your browser at http://localhost:8686
# api:
#   enabled: true
#   address: "127.0.0.1:8686" 
[Unit]
Description=Vector
Documentation=https://vector.dev
After=network-online.target
Requires=network-online.target

[Service]
ExecStartPre=/usr/bin/vector --config  /etc/vector/vector_log.yaml validate
ExecStart=/usr/bin/vector --config  /etc/vector/vector_log.yaml
ExecReload=/usr/bin/vector --config  /etc/vector/vector_log.yaml validate
ExecReload=/bin/kill -HUP $MAINPID
Restart=always
AmbientCapabilities=CAP_NET_BIND_SERVICE
EnvironmentFile=-/etc/default/vector
# Since systemd 229, should be in [Unit] but in order to support systemd <229,
# it is also supported to have it here.
StartLimitInterval=10
StartLimitBurst=5
[Install]
WantedBy=multi-user.target 

deploy best practice

Create env file

Create systemd service file

cp systemd service file to /usr/lib/systemd/system/ dir.

sudo cp /home/ryefccd/.vector/etc/systemd/vector.service /usr/lib/systemd/system/

Enable service

ryefccd@republic:~/.vector$ ls /lib/systemd/system |grep vector
vector_data.service
vector_log.service

systemctl list-unit-files
systemctl daemon-reload
systemctl list-unit-files

systemctl enable vector_data.service
systemctl enable vector_log.service

运行结果示例:

systemctl daemon-reload
ryefccd@republic:~/.vector$ systemctl enable vector_data.service
Created symlink /etc/systemd/system/multi-user.target.wants/vector_data.service → /lib/systemd/system/vector_data.service.

ryefccd@republic:~/.vector$ systemctl enable vector_log.service
Created symlink /etc/systemd/system/multi-user.target.wants/vector_log.service → /lib/systemd/system/vector_log.service.

start

systemctl start vector_data.service

systemctl start vector_log.service

stop

systemctl stop vector

status

systemctl status vector

Test service

资料

multiline-messages

运行日志中多行日志配置.