imports: - name: "Test_Team" conf_id: "XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX" prefix: test_team params: ElasticIndexName: TEST_INDEX # all workflows are defined in this section. # a workflow consists of labels, outputs and rules. # an input matches with a workflow if there's any intersection between input's labels and workflow's input_labels. # outputs are referred by their unique name # # Note about incoming line/byte metrics: A source can be added to multiple workflows however # only the first workflow that a source appears in will be used for incoming line/byte metric related operations. # Workflows are ordered by name ascendingly and first workflow is picked for incoming line/byte metric related operations. # - incoming line/byte metrics are reported to the destinations of the first workflow that the source appears in. # - incoming line/byte metrics based thresholds, which are specified in thresholds section of the workflow, are honored only for the first workflow that the source appears in. workflows: # This one demonstrates a workflow that applies a generic rule to all sources containing same label. error-anomaly-workflow: filters: # filters are applied before the processors - error input_labels: - errorcheck processors: - error-regex destinations: - '{{ Env "TEST_SUMO" }}' - error-anomaly-slack - error-anomaly-moogsoft-webhook - pagerduty-integration billing-http-requests-workflow: input_labels: - billing processors: - http-request-latencies thresholds: - http-latencyp95-threshold - http-avg-threshold - incoming-lines-threshold - incoming-bytes-threshold destinations: - '{{ Env "TEST_SUMO" }}' - email # This workflow applies clustering only to the input sources from our billing application billing-cluster-workflow: input_labels: - billing processors: - clustering destinations: - datadog-default # Edgedelta agent collects system and docker metrics. This workflow specify where those raw metrics should be sent. # No metric processing is supported at this moment. stats-workflow: input_labels: - system - docker - agent - infa-processes destinations: - '{{ Env "TEST_SUMO" }}' # Edgedelta agent keeps its components' health data including status, success and error counts and their short historic values like past 10m. health_stats_workflow: input_labels: - agent-components-health destinations: - datadog-custom heartbeat_metrics_workflow: input_labels: - agent-heartbeat destinations: - '{{ Env "TEST_SUMO" }}' # archiving workflow forwards raw logs to configured archive destinations # there can be at most one workflow with one or more archive destinations # # `archive_workflow`` name specifically introduced to all base configs in # https://github.com/edgedelta/edgedelta/pull/5444 archive_workflow: filters: - info input_labels: - billing destinations: - my-s3 - my-gcs # User can define an expiring workflow and set the expiration time in workflow definition. # By using expiring workflow you can also enable log forwarding until a specific time. # Note The "expires_in" time format must be in RFC3339 format. log_forward_workflow: description: "test for time limit" input_labels: - system - docker - agent - infa-processes filters: - info destinations: - '{{ Env "TEST_SUMO" }}' expires_in: 2021-06-01T12:00:00.000Z # workflow that can only run in specified hosts. enabled_hosts_workflow: description: "runs only specified hosts" input_labels: - system - docker - agent - infa-processes filters: - info destinations: - '{{ Env "TEST_SUMO" }}' # workflow only runs if the agent host is one of these hosts. enabled_hosts: - my.host.us1 - my.host.us2 # Conditional workflow triggers when the conditions are met. # Supported conditional wf cases: # - on_demand: triggered by an event posted to /on_demand API # - source_version_change: triggered by the ed agent when a change is detected in source version conditional-workflow: input_labels: - errorcheck conditions: - alert_on_my_service destinations: - '{{ Env "TEST_SUMO" }}' # all inputs are defined in this section # each one defines a comma separated list of labels inputs: system_stats: labels: "system" interval: 30s # if there exist more than one interval for stats, stat beater will fallback to the minimum interval container_stats: labels: "docker" # This input should only be defined whenever a user wants to forward this data to external destinations. Otherwise, "agent_stats_enabled" under "agent_settings" should be used agent_stats: labels: "agent" # Agent component health can be defined in only one workflow. The destinations which have "health" feature turned on will receive agent health data as logs" agent_components_health: labels: "agent-components-health" # Agent heartbeat can be defined in only one workflow. The destinations which have "heartbeat" feature turned on will receive agent heartbeat metrics agent_heartbeat: labels: "agent-heartbeat" kubernetes_stats: labels: "kubernetes-stats" # to be able to use k8s_events set env var ED_LEADER_ELECTION_ENABLED="1" in agent deployment manifest. k8s_events: labels: "k8s-events" process_stats: groups: - labels: "infa-processes" include: users: - infa - pcuser cmds: - org.apache.catalina.startup.(Bootstrap) - /tomcat/temp/([^/\s]+) - /home/pcuser/Informatica/9.1.0/server/bin/([^/\s]+) - (AdminConsole) exclude: users: - root cmds: - pmserver containers: - labels: "nginx,errorcheck" include: - "image=gitlab/nginx:latest" - labels: "billing,billing-ui,errorcheck" include: - "image=billing-dashboard:.*" enable_incoming_line_anomalies: true - labels: "errorcheck" include: - "image=.*" # Detects line patterns automatically based on the Ragel FSM Based Lexical Recognition process. No need to specify line_pattern explicitly. auto_detect_line_pattern: true - labels: "apache-web" include: - "name=apache*,image=.*latest$" exclude: - "image=.*nginx.*" - "name=.*nginx.*" # If "line_pattern" regex rule is specified in the agent config, then agent process lines not for New Line("\n") but for this specific line separation rule. line_pattern: '^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}' kubernetes: - labels: "nginx,errorcheck" include: - "pod=^nginx.*$,kind=ReplicaSet,namespace=default,container-name=nginx,container-image=docker.io/nginx:latest,labels.my_app=abc" exclude: # exclude has higher priority - "namespace=^kube-system$" - labels: "apache,errorcheck" include: - "pod=^apache.*$,namespace=.*web*" exclude: # exclude has higher priority - "namespace=^kube-nginx$" - "pod=.*nginx*,kind=StatefulSet" - labels: "k8s with enrichment dynamic fields from labels" include: - "pod=flog,namespace=default" filters: - enrichment-failure-behavior # it should be defined as 'eks=true' - labels: "k8s with enrichment aws metadata" include: - "pod=flog,namespace=default" filters: - enrichment-aws - labels: "k8s with enrichment GCP metadata" include: - "pod=flog,namespace=default" filters: - enrichment-gcp - labels: "k8s with cursor tracking" enable_persisting_cursor: true include: - "pod=flog,namespace=default" - labels: "k8s with preserve original timestamp" preserve_original_timestamp: true include: - "pod=flog,namespace=default" - labels: "k8s with auto log level detector" # auto_log_level_detector extracts logLevel info from log lines automatically. auto_log_level_detector: true include: - "pod=flog,namespace=default" # when this input is defined, an implicit workflow will be injected into the agent to collect traffic in k8s using ebpf # agent will use tracer package to listen to communications and produce records # these records will be further processed to produce metrics on count and p95 latency of the traffic (ed_k8s_traffic_latency.count and ed_k8s_traffic_latency.p95) # the dimensions for these metrics are protocol, role, URI, method, response_code, client_namespace, client_controller, server_namespace, server_controller k8s_traffic: labels: "k8s-traffic" include: - "namespace=default" exclude: # exclude has higher priority - "namespace=^kube-system$" files: - labels: "billing,errorcheck" path: "/billing/logfolder1/*.log" - labels: "billing,errorcheck" path: "/etc/systemd/system/billingservice/*.log" # These are list of regexes that will be run against glob path's captures and will be discarded if there is a match exclude: - "/etc/systemd/system/billingservice/test.log" - "/etc/systemd/system/billingservice/dev.log" # Detects line patterns automatically based on the Ragel FSM Based Lexical Recognition process. No need to specify line_pattern explicitly. auto_detect_line_pattern: true # stack trace detector only runs in auto line detection mode boost_stacktrace_detection: true enable_persisting_cursor: true # source filters apply at the source pipe level. # All contextual logs and archive logs will be subject to these filters # all workflow and rule pipes for this source will get the logs after these filters are applied filters: - info - not_trace - mask_card - mask_password - labels: "docker,my_container" path: "/var/lib/docker/my_container/*.log" # If you collect the docker container standard output logs on a file with "JSON File logging driver", you need define and enable docker_mode. docker_mode: true - labels: "app,service_a" path: "/var/log/service_a.log" # If "line_pattern" regex rule is specified in the agent config, then agent process lines not for New Line("\n") but for this specific line separation rule. line_pattern: "^MMM dd, yyyy hh:mm:ss" # option for late log handling in terms of rule metrics (alerting and rule metric .error/.avg/.max/.min create) and cluster patterns/samples # ignore rule metrics/alerting for logs that have timestamp older than 15m from now, report cluster patterns with logs' original timestamp instead of time.Now() late_arrival_handling: rule_metrics: ignore_after: 15m patterns: ignore_after: 4h report_with_original_timestamp: true filters: - source-detection-docker - labels: k8s_log path: /var/logs/anyDir/MyApp/users/MyPodID/transaction.log filters: - enrichment-full # all the files in this globalPath are going to be processed, clustered, populated metrics and streamed individually. - labels: "billing,errorcheck" path: "/billing/logfolder1/*.log" separate_source: true # with this flag all files are working individually. # add ingestion_time to JSON logs - labels: "billing,errorcheck" path: "/billing/logfolder1/*.log" # ingest timestamp if input is JSON format. add_ingestion_time: true skip_ingestion_time_on_failure: true # skip ingestion time when the input is broken or invalid format. winevents: - channel: "Application" labels: "errorcheck" - channel: "Security" labels: "errorcheck" - channel: "System" labels: "errorcheck" - channel: "Setup" labels: "errorcheck" ports: - protocol: tcp port: 514 labels: "syslog,firewall" line_pattern: '^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}' - protocol: tcp port: 24680 labels: "errorcheck" - protocol: udp port: 13579 labels: "errorcheck" - protocol: tcp port: 8080 labels: "syslog,tls,service_a" tls: crt_file: /certs/server-cert.pem key_file: /certs/server-key.pem ca_file: /certs/ca.pem # exec inputs run at specified intervals # if command fails (non-zero exit code) its output will be ignored # on successful executions each line of stdout will be ingested to the system separately. execs: - name: "processes" labels: "top" command: "top" interval: 3m - name: "welcomes" labels: "script" interval: 10s command: "/bin/sh -c" script: | for i in {1..50} do echo "Welcome $i times" done # ed_ports inputs can be used to consume data over tcp or http. # schema: Can be empty or FlattenedObservation. If schema is empty then it is assumed the incoming data is raw text lines. # read_size: Read size is only applicable when otherwise it is not used. # If read_size is not provided for the case then it will be assumed as 1. # read_timeout: Timeout duration for reading from tcp port. It is only applicable for protocol=tcp. ed_ports: - labels: "error-counts-per-node" port: 4545 protocol: http schema: FlattenedObservation - labels: "errorcheck" port: 9000 protocol: tcp read_size: 10000 read_timeout: 30s filters: # There can be multiple source detectors attached to an input. # If one fails (and it is optional) the log will pass the filter and reach for next one. # When one of the source detectors can successfully detect the source (meaning all field mappings are satisfied), # rest of the source detectors (if any) will be skipped. - source-detection-k8s - source-detection-custom - source-detection-attribute - labels: "ed-port-with-auto-detect-line-pattern" port: 5656 protocol: tcp # Detects line patterns automatically based on the Ragel FSM Based Lexical Recognition process. No need to specify line_pattern explicitly. auto_detect_line_pattern: true - labels: "ed-port-with-given-line-pattern" port: 9091 protocol: http # If "line_pattern" regex rule is specified in the agent config, then agent process lines not for New Line("\n") but for this specific line separation rule. # If line_pattern or auto_detect_line_pattern is set then the ingested raw messages should end with "\n" otherwise http input does not work correctly. line_pattern: '^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}' # option for late log handling in terms of rule metrics (alerting and rule metric .error/.avg/.max/.min create) and cluster patterns/samples # ignore rule metrics/alerting for logs that have timestamp older than 15m from now, report cluster patterns with logs' original timestamp instead of time.Now() late_arrival_handling: rule_metrics: ignore_after: 10m patterns: ignore_after: 3h report_with_original_timestamp: true - labels: "ed-port-with-network-interface" port: 4545 protocol: tcp # Listen is for network interface and default value is "0.0.0.0". listen: "127.0.0.1" - labels: "ed-port-tcp-with-tls" port: 4545 protocol: tcp tls: crt_file: /certs/server-cert.pem key_file: /certs/server-key.pem ca_file: /certs/ca.pem - labels: "ed-port-https-with-tls" protocol: https listen: localhost port: 443 tls: crt_file: /certs/server-cert.pem key_file: /certs/server-key.pem ca_file: /certs/ca.pem - labels: "ed-port-with-add-ingestion-time" port: 4545 protocol: tcp # ingest timestamp if input is JSON format. add_ingestion_time: true skip_ingestion_time_on_failure: true # skip ingestion time when the input is broken or invalid format. # demo inputs generates fake data with given intervals # 1ms speed generates roughly 1000 log lines per second # 100ms error interval kicks in burst of errors by the given error count demos: - name: "fast_demo" labels: "fast" speed: "1ms" error_interval: "100ms" error_count: 20 - name: "slow_demo" labels: "slow" speed: "2h" error_interval: "2m" error_count: 30 ecs: - labels: "errorcheck" include: - "container-name=myecho,task-family=test-task,task-version=2" exclude: - "container-name=.*xray.*" eventhubs: - labels: "errorcheck" # Eventhub connection string can be retrieved from Azure portal: https://docs.microsoft.com/en-us/azure/event-hubs/event-hubs-get-connection-string connection_string: "Endpoint=sb://edgedelta.servicebus.windows.net/;EntityPath=insights;SharedAccessKeyName=***;SharedAccessKey==***" # Eventhub consumer group. consumer_group: "$Default" # If partition ids are provided then only those partitions are consumed. # Otherwise Event Processor Host approach is used which for loadbalancing partitions between consumers # Checkpoint directory is used to persist last read sequence numbers. partition_ids: "0,1,2,3" checkpoint_dir: "/var/eventhub-checkpoint/" # Storage account is used only when partition_ids is empty. In this mode azure storage is used as lease & checkpoint backend. # Multiple agents communicate via blob ojects to loadbalance the eventhub partitions evenly and keep track of last retrieved event offset # storage_account_name: "mystorageacc" # storage_account_key: "*****" # storage_container_name: "edgedelta-eventhub-container-for-test" kafkas: - labels: "errorcheck" # Kafka endpoints are comma separated urls list for brokers endpoint: "something" # Kafka topic to listen to. topic: "topic" - labels: "errorcheck-tls" endpoint: "something-tls" topic: "topic" tls: disable_verify: true ca_file: /var/etc/kafka/ca_file ca_path: /var/etc/kafka crt_file: /var/etc/kafka/crt_file key_file: /var/etc/kafka/keyfile key_password: p@ssword123 client_auth_type: noclientcert # possible selections: noclientcert, requestclientcert, requireanyclientcert, verifyclientcertifgiven, requireandverifyclientcert min_version: TLSv1_1 max_version: TLSv1_3 - labels: "my-kafka-events" endpoint: "something" topic: "topic" # Consumer group to isolate the consumption of topic for the agents. All agents sharing same config will be joining same consumer group. group_id: "my-group" sasl: username: kafka_username password: p@ssword123 mechanism: PLAIN # possible selections: PLAIN, SCRAM-SHA-256, SCRAM-SHA-512 filters: - source-detection-custom s3_sqs: - labels: "errorcheck" sqs_url: "https://sqs.us-west-2.amazonaws.com/233765244907/taylan-test-sqs" # access credentials must have following permissions for following actions: # sqs:DeleteMessage, s3:GetObject, sqs:DeleteMessageBatch, sqs:ReceiveMessage access_key_id: "ABCDEFG" access_secret: "Dn2djaskl" # region where the bucket and sqs queue located region: "us-west-2" - labels: "alb" sqs_url: "https://sqs.us-west-2.amazonaws.com/233765244907/my-alb-logs-sqs" access_key_id: "ABCDEFG" access_secret: "Dn2djaskl" region: "us-west-2" # supported log types for s3: # - alb: Application load balancer logs are gzip files. Each line contains raw access log message. # - cloudtrail: Cloud trail logs are gzip files. Each file contains a json object which has multiple records inside. log_type: alb - labels: "sqs-inout-assumes-role" sqs_url: "https://sqs.us-west-2.amazonaws.com/233765244907/taylan-test-sqs" region: "us-west-2" # role_arn is used for assuming an iam role. To see how it works ref: https://docs.aws.amazon.com/IAM/latest/UserGuide/tutorial_cross-account-with-roles.html role_arn: "arn:aws:iam:::role/" # external_id increases the security of the role by requiring an optional external identifier, which prevents "confused deputy" attacks. external_id: "053cf606-8e80-47bf-b849-8cd1cc826cfc" cloudwatches: - labels: "us-west-2_ed-log-group_admin-api" # region supports regex, all regions in us. region: "^us.*$" # log_group supports regex expression and if it is not provided means get all log groups. # log groups starting with /ed-log-group log_group: "^/ed-log-group" # log_stream supports regex expression and if it is not provided means get all log streams. log_stream: "^log.*$" # lookback is used for how long ago to monitor log events. Default is 1 hour. lookback: 1h # interval is used for polling frequency to look new coming log events. Default is 1 minute. interval: 1m # prepend_timestamp is used to add event timestamp as a prefix of event message with a tab("\t") delimiter. prepend_timestamp: true # The maximum number of log events returned. # Default the maximum is as many log events as can fit in a response size of 1 MB, up to 10,000 log events. result_limit: 5000 - labels: "us_ed-log-group_admin" region: "^us.*$" log_group: "/ed-log-group" log_stream: "^admin.*$" interval: 5m - labels: "ed-log-group" # all regions. region: ".*" log_group: "/ed-log-group" # all streams. log_stream: ".*" interval: 5m - labels: "ed-log-with-regex-group-name" # all regions. region: ".*" # log groups starting with /ed-log log_group: "^/ed-log" # all streams. log_stream: ".*" interval: 5m - labels: "cloudwatch-input-assumes-role" region: "us-west-2" log_group: "/ed-log-group" # all streams. log_stream: ".*" interval: 5m # role_arn is used for assuming an iam role. To see how it works ref: https://docs.aws.amazon.com/IAM/latest/UserGuide/tutorial_cross-account-with-roles.html role_arn: "arn:aws:iam:::role/" # external_id increases the security of the role by requiring an optional external identifier, which prevents "confused deputy" attacks. external_id: "053cf606-8e80-47bf-b849-8cd1cc826cfc" - labels: "cloudwatch-input-host-filtered" # If host is set then cloudwatch will be tailed by the agent on given host. This might be useful to avoid duplicate consumption if you have more than one agent running this config host: "myhost" # all regions. region: ".*" # log groups starting with /ed-log log_group: "^/ed-log" # all streams. log_stream: ".*" interval: 5m - labels: "cloudwatch-input-with-aws-creds" # all regions. region: ".*" # log groups starting with /ed-log log_group: "^/ed-log" # all streams. log_stream: ".*" interval: 5m aws_key_id: '{{ Env "AWS_KEY_ID" }}' aws_sec_key: '{{ Env "AWS_SECRET_KEY" }}' - labels: "cloudwatch-input-with-rate-limiters" region: ".*" log_group: "^/ed-log" log_stream: ".*" interval: 5m # According to AWS doc here are the limits: # DescribeLogGroups: 5TPS, DescribeLogStreams 5TPS, GetLogEvents 25 TPS # ref: https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/cloudwatch_limits_cwl.html rate_limiters: - api_name: "DescribeLogGroups" every: 1s limit: 4 - api_name: "DescribeLogStreams" every: 1s limit: 4 - api_name: "GetLogEvents" every: 1s limit: 22 pubsubs: - labels: "google_pubsub_with_key" project_id: "projectID1" sub_id: "subID1" key: "{\"pubsub_key\": \"key_123\"}" - labels: "google_pubsub_with_key_path" project_id: "projectID2" sub_id: "subID2" key_path: "pubsub_key_path" otlp: - labels: "my-otlp-traces,error" port: 8585 data_type: "trace" # possible types are "trace", "log" and "metric", only one should be selected nats: - labels: "my-nats-normal" # Input mode is either "normal", which is for normal tailing with only subject needed, # or "distributed" where subject prefix, agent_index (or agent_index_descriptor), total_agent_count and total_subject_count is needed for consuming multiple sources input_mode: "normal" # Consumer mode can be either "pull" or "push" which corresponds to pull based and push based consumer # One can refer to NATS documentation for more info consumer_mode: "pull" # Cluster URL is where NATS cluster endpoint is cluster_url: "nats://localhost:4222" # Stream Name is the name of previously designated NATS JetStream stream that this input will consume from stream_name: "example-stream" # Subject is the channel that this input will consume from subject: "example-subject-1" # Timeout is the timeout duration for the request that will be done against NATS cluster, default is 10s timeout: 1m # Ack Wait Duration is the duration that a delivered message might remain unacknowledged before redelivery is attempted, default is 5s ack_wait_duration: 10s - labels: "my-nats-distributed" input_mode: "distributed" consumer_mode: "push" cluster_url: "nats://localhost:4222" stream_name: "example-stream" # Subject Prefix is the prefix for channel(s) that this input will consume from and these channel(s) are calculated # with the help of agent_index (or agent_index_descriptor), total_agent_count and total_subject_count subject_prefix: "example-subject" # Total agent count is the number of agents that will be listening from this stream and it would be used for calculating which subjects this input will listen from. # Agent name should be set via 'ED_NATS_AGENT_NAME' environment variable for this calculation to work total_agent_count: 5 # Total subject count is the number of subjects that is created in advance and this number should be fixed total_subject_count: 10 # Should split lines will make this input to split obtained payload into separate lines with respect to newline character should_split_lines: true timeout: 1m # Disable acks will make this input to not acknowledge any message after consuming, default is false disable_acks: true # Fluentd input can consume data sent from a fluentd agent which is configured to output using fluent-forward protocol. # It can be connected with secure and insecure connections. # example conf from fluentd.conf: # # # https://docs.fluentd.org/v1.0/articles/in_forward # @type forward # heartbeat_type none # keepalive true # # shared_key "sharedKey" # shared key is used between nodes. # # # # host # IP or host to an endpoint which distributes the requests to the ED Agents. # port 3421 # Port which the ED Agents listens for fluentd input. # # fluentds: - labels: my-fluentd-collector port: 9898 read_timeout: 60s shared_key: sharedKey tls: ca_file: /var/run/secrets/secure-forward/ca-bundle.crt crt_file: /var/run/secrets/secure-forward/tls.crt key_file: /var/run/secrets/secure-forward/tls.key disable_verify: true # filters do the filtering before processors handle their jobs. # their names should be unique within filters section filters: - name: error type: regex # type declaration is optional for regex filters because default type is regex. pattern: "error|ERROR|problem|ERR|Err" - name: info pattern: "info|INFO" - name: warn pattern: "warn|WARN" - name: error_or_info pattern: "error|info" - name: not_trace pattern: "TRACE" negate: true - name: appinsight_trace_filter type: buffered-trace # trace_id_pattern must be a regex with single capture group. trace_id_pattern: "\"operation_Id\": \"(?P\\w+)\"" # failure_pattern is the regex pattern which is used to determine whether the trace events should be considered failure. # all failed traces pass the filter. failure_pattern: \"status\":\"Failed\" # Latency pattern is the optional regex pattern which extracts latency value from trace events. latency_pattern: "\"latency\": \"(?P\\d+)\"" # all high latency traces exceed the given value pass the filter. latency_threshold: 500.0 # success_sample_rate is a floating number between 0 and 1. Default is 0.0. success_sample_rate: 0.2 # trace_deadline is the deadline to wait after last event for a trace. Once deadline is reached filtering/sampling will be applied to the events of trace trace_deadline: 1m - name: mask_card type: mask # predefined data types(credit_card, us_phone_dash) can be used by only specifiying their names. predefined_pattern: credit_card - name: mask_phone type: mask predefined_pattern: us_phone_dash mask: 'XXXXX' - name: mask_password type: mask # rules with user given names to match custom patterns require pattern regex. pattern: 'password:\s*(?P\w+)' # The captured group names in the regex pattern will be replaced with the corresponding value specified in mask_captured. # If 'mask' is defined, this feature will be suppressed by it. mask_captured: pw: '******' - name: mask_email type: mask pattern: 'email:\s*(?P\w+)' mask_captured: email: '******' - name: extract_severity type: extract-json-field # Field path is a dot separated path of the field (i.e. "log.message"). Its value will be extracted and the original json content will be discarded field_path: "severity" # keep_log_if_failed is useful when filter processes mixed json and non-json logs and non-json logs should be kept and ingested as original keep_log_if_failed: true - name: extract_first_data type: extract-json-field # Field path can support indexing extraction as well field_path: "records[0].data" - name: extract_all_data type: extract-json-field # field path can support fetching all indexes using [*]. In that case all the items of array will be ingested into agent as separate log field_path: "records[*].data" - name: drop_some_fields type: drop-json-fields # Each field path is a dot separated path of the field (i.e. "log.source"). The fields will be removed from the original content and not processed. field_paths: - "level" - "details" - "log.source" # Custom attribute filter do all the log filtering with given key-value of the attribute - name: custom_attributes_filter type: custom-attributes key: service value: billing - name: negate_custom_attributes_filter type: custom-attributes key: component # A comma separated values to match. If any of them matches the given attribute's value then the log will be pass through value: credithandler,debithandler # Negate is also supported for attribute filter negate: true # Filtering custom attributes also support regex matching - name: regex_custom_attributes_filter type: custom-attributes key: level pattern: "error|ERROR|problem|ERR|Err" # Filter combination allows grouping filters using and/or operators. Only 1 operator can be used per combination - name: combination_filter_or type: combination operator: or # filters being listed in a combination can be name of another filter or an in-line filter filters_list: - filter_name: error - filter_name: custom_attributes_filter - name: negate_combination_filter_and type: combination operator: and filters_list: - pattern: "debug|DEBUG" negate: true # A combination filter cannot be defined as an in-line filter # To use nested combination filter, define the inner filter before this and mention it using filter_name - filter_name: negate_custom_attributes_filter # filter combination also support negating negate: true - name: elastic_apm_trace_filter type: buffered-elastic-apm-processor # payload_separator distinguishes newline seperated elastic APM logs from each other such as # ...... payload_separator: "-----------------" # enabled_types is the comma-separated list of enabled trace types: "transaction", "span", "error" and "metricset" # default is "transaction,span,error,metricset" # types outside of enabled types will be discarded during tracing processing enabled_types: "transaction,span,error,metricset" # optimize_types is the comma-separated list of trace types that will be optimized: "transaction", "span", "error" and "metricset" # default is "transaction,span,error,metricset" # types outside of enabled types will be discarded during tracing processing # optimized types will be omitted everytime except failure, high latency or success sampling mode and other types will be flushed instead optimize_types: "transaction,span" # trace_deadline is the deadline to wait after last event for a trace. Once deadline is reached filtering/sampling will be applied to the events of trace # default is 30s trace_deadline: 1m # should_filter_traces enables filtering traces based on failures, high latencies and sampling rate should_filter_traces: true # failure_path is the path where failure info will be obtained from, obtained value will be always casted into string failure_path: "transaction.result" # failure_value_pattern will be tried to match against obtained value from failure_path failure_value_pattern: "HTTP (4|5)xx" # transaction_latency_path will override the latency path for transactions, default is "transaction.duration" transaction_latency_path: "transaction.duration" # span_latency_path will override the latency path for spans, default is "span.duration" span_latency_path: "span.duration" # all high latency traces exceed the given value pass the filter latency_threshold: 35.5 # success_sample_rate is a floating number between 0 and 1. Default is 0.0. success_sample_rate: 0.2 - name: opentelemetry_trace_filter type: buffered-otlp-trace-processor # trace_deadline is the deadline to wait after last event for a trace. Once deadline is reached filtering/sampling will be applied to the events of trace # default is 30s trace_deadline: 1m # should_filter_traces enables filtering traces based on failures, high latencies and sampling rate should_filter_traces: true # failure_path is the path where failure info will be obtained from, obtained value will be always casted into string # It should be either in "name" or "attributes." format where "" can be any attribute name for a span failure_path: "attributes.result_code" # failure_value_pattern will be tried to match against obtained value from failure_path failure_value_pattern: "(4|5)xx" # all high latency traces exceed the given value pass the filter latency_threshold: 2500.0 # success_sample_rate is a floating number between 0 and 1. Default is 0.0. success_sample_rate: 0.1 # base64-decode is a filter that matches and decode logs that are in base64 decoded format. Logs that don't match base64 format will be dropped - name: base64_decoder type: base64-decode # split-with-delimiter is a filter that matches and split a single log into multiple logs using a specified delimiting character. Default is the newline character unless specified with the field "delimiter" # Line splitting is already handled at the input level using the newline delimiter. This filter is useful in cases a further splitting is needed. - name: split_logs_using_specified_delimiter type: split-with-delimiter delimiter: "," - name: source-detection-k8s type: source-detection source_type: "K8s" # Possible source types: "Docker", "ECS", "File", "K8s" and "Custom" (for Custom, one can define as many field mappings as desired) # When optional is set to "true", if acquired log is not processed properly, this log will be not ignored (default behavior) and will be ingested with original # (in this case, port) source definition optional: true # There exists three processing mode, json (default), regex and attribute # For json, user should input JSON path as value for each field mapping # For regex, user should input a regex pattern with exactly one capturing group named as "field", ex: "path (?P\w+)" # For attribute, user should input key of the attribute that will be extracted from source, which can be registered by enrichments processing_mode: json # Field mappings define how we can extract certain information from obtained input # Key is the label that we decorate the log with respect to given source type. We will match the given key with our internal # representation for this decoration purpose. Value corresponds to the JSON path that one can find the field related to the key. # Possible mappings for each source type: # Docker: # - docker_container_id, docker_container_image (Mandatory) # - docker_container_name, docker_image_name (Optional) (Not defining them can cause loss of information) # ECS: # - ecs_container_id, ecs_container_image, ecs_container_name (Mandatory) # - ecs_cluster, ecs_container, ecs_task_family, ecs_task_version (Optional) (Not defining them can cause loss of information) # File: # - file_path, file_glob_path (Mandatory) # K8s: # - k8s_namespace, k8s_pod_name, k8s_container_name, k8s_container_image (Mandatory) # - k8s_logfile_path, k8s_controller_kind, k8s_controller_name, k8s_controller_logical_name, k8s_pod_id, k8s_docker_id (Optional) (Not defining them can cause loss of information) # - k8s_short_container_image is a shortened version of k8s_container_image tag (e.g. alpine:3.1.11 -> alpine, gcr.io/edgedelta/agent/v0.1.21 -> agent). This is a hidden tag that can only be retrieved using dynamic enrichment field_mappings: k8s_namespace: "kubernetes.namespace" k8s_pod_name: "-" # Instead of defining a path, one can simply use dash to skip a field to decrease cardinality k8s_container_name: "kubernetes.container.name" k8s_container_image: "kubernetes.container.image" k8s_controller_logical_name: "kubernetes.controller.name" - name: source-detection-docker type: source-detection source_type: "Docker" optional: false field_mappings: docker_container_id: "docker.id" docker_container_image: "docker.image" - name: source-detection-custom type: source-detection source_type: "Custom" optional: false processing_mode: regex field_mappings: namespace: namespace (?P\w+) serviceName: service (?P\w+) roleName: user_role (?P\w+) systemType: system (?P\w+) - name: source-detection-attribute type: source-detection source_type: "Custom" optional: false processing_mode: attribute field_mappings: namespace: namespace serviceName: service roleName: role systemType: system - name: enrichment-full type: enrichment # from_path is used to enrich data with fields extracted from path. from_path: field_mappings: - field_name: application # pattern must be a capture pattern and only for one capture group. pattern: /var/logs/anyDir/(?:(.+)/)?users/.* # from_k8s is used to enrich data with k8s attributes when the data is being streamed. # currently, "pod, namespace, node, controllerName, controllerLogicalName and controllerKind" can be used. from_k8s: pod_identifier_pattern: /var/logs/anyDir/MyApp/users/(?:(.+)/)/.* field_mappings: - field_name: instance_id pod_attribute: pod transformers: # replace all "source" matches with "target" - source: "-" target: "_" type: "replace" # remove all "test" words - source: "test*" target: "" type: "regex" - field_name: namespace pod_attribute: namespace # fields from labels should have pod_attribute start with "labels." - field_name: service pod_attribute: labels.service # dynamicField from populated from other fields. value should be in text template format. # The fields in template must be defined in from_path or from_k8s. dynamic: field_mappings: - field_name: tag value: "tail.{{.application}}.{{.service}}" # dynamic fields can be derived from other dynamic fields. # Dependent fields should be ordered in dependency order. If field2 is dependent to field1 then field1 defined before field2. - field_name: version value: "v.0.1.13.{{.tag}}" # You can define static fields and dynamic fields can be derived from static fields. - field_name: static_field value: "static_value" - field_name: derived_from_static_field value: "derived_from_static.{{.static_field}}" # from_logs is used to enrich data with fields extracted from logs from_logs: field_mappings: - field_name: podname # pattern must be a capture pattern and only for one capture group. pattern: "podname: (\\w+)" - field_name: component # extracting using json_path is also supported json_path: fields.[1].component # fallback_value is applied as value instead if the enrichment fails fallback_value: servicehandler - name: enrichment-failure-behavior type: enrichment # failure_behavior field can be used to configure what happens to the enrichment upon failure. Supported options are skip_failing_fields|stop_processing|drop_source. Default if not set is skip_failing_fields # drop_source failure behavior cannot apply for from_logs enrichment failure_behavior: stop_enrichment dynamic: field_mappings: - field_name: "service" value: '{{".labels.service"}}' - field_name: "source" value: '.annotations.kubernetes.io/{{.container_name}}.logs' # json_path in dynamic enrichment is applied to the object extracted by text template format from the value field above json_path: "[0].source" # fallback_value is the next value to be applied if the first one fails to apply. Exclusive to dynamic enrichment only fallback_value: '{{".short_container_image"}}' # template is also applicable for field_name # in case of complicated template (if/else, range statement, etc...) "[[" and "]]" are needed as delimiters for these statements - field_name: '[[if eq .controllerKind "replicaset"]]kube_deployment[[else]]kube_[[.controllerKind]][[end]]' value: "{{.controllerName}}" - name: enrichment-aws type: enrichment dynamic: field_mappings: # if the field value starts with "aws-instance" then instance metadata is get from aws ec2 instance. # for more info ref: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - field_name: "instance_id" value: '{{".aws-instance.instance-id"}}' - field_name: "instance_type" value: '{{".aws-instance.instance-type"}}' - field_name: "cluster_name" value: '{{".aws-instance.cluster-name"}}' - field_name: "ec2launchtemplate_id" value: '{{".aws-instance.ec2launchtemplate-id"}}' - field_name: "ec2launchtemplate_version" value: '{{".aws-instance.ec2launchtemplate-version"}}' - field_name: "inspector_enabled" value: '{{".aws-instance.inspector-enabled"}}' - field_name: "cluster_autoscaler_enabled" value: '{{".aws-instance.cluster-autoscaler-enabled"}}' - field_name: "autoscaling_groupName" value: '{{".aws-instance.autoscaling-groupName"}}' - field_name: "nodegroup_name" value: '{{".aws-instance.nodegroup-name"}}' - field_name: "ec2_fleet_id" value: '{{".aws-instance.ec2-fleet-id"}}' - name: enrichment-gcp type: enrichment dynamic: field_mappings: # if the field value starts with "gcp" then instance metadata is get from the GCP instance. # for more info ref: https://cloud.google.com/compute/docs/metadata/querying-metadata - field_name: "project_id" value: '{{".gcp.project.project-id"}}' - field_name: "hostname" value: '{{".gcp.instance.hostname"}}' - field_name: "zone" value: '{{".gcp.instance.zone"}}' - field_name: "instance_id" value: '{{".gcp.instance.id"}}' - field_name: "instance_name" value: '{{".gcp.instance.name"}}' - field_name: "instance_tags" value: '{{".gcp.instance.tags"}}' - field_name: "cluster_name" value: '{{".gcp.instance.attributes.cluster-name"}}' - field_name: "gcp_image_tag" value: '{{".gcp.instance.image"}}' - field_name: "gcp_dev_name" value: '{{".gcp.instance.disks.0.device-name"}}' - name: warn_error_detection type: log-level-detector pattern: WARN|ERROR # fallback_value can be used to set the default log level attribute if log-level-detector is unable to fetch the log level using given pattern fallback_value: INFO # capture group can also be used to fetch the log level from log. The regex pattern being used in this filter can only support a single unnamed capture group - name: capture_group_log_level_detection type: log-level-detector pattern: " level=(\\w+) " # field_name field can be used to set what name should the field of this tag be. Default is "level" field_name: log_level # log-transformer-javascript is a filter that use Goja (a type of script in JavaScript) for transforming and manipulating the log message # the raw log variable must be named "log" in the script # the raw log will be passed in the script before running and exported after script is done running # therefore in the script there is no need to define the log or return it # Example log that following script transform: # {"app": "alpine", "service": "nonprod", "source": "tomcat"} # -> {"app": "alpine", service": "nonprod", "source": "tomcat", "version": "latest"} - name: add_json_field type: log-transformer-javascript script: | var obj = JSON.parse(log); obj.version = "latest"; log = JSON.stringify(obj); # "keep_log_if_failed" field is intended to replace "required" field, with the default value is false # Starting from agent v0.1.51, default behavior for all script-based filters and gzip-decompressor filters will be to drop/ignore logs if the filter fails to execute the script/decompression # this field is applicable for log-transformer-javascript, enrichment-using-javascript, log-filter-javascript and gzip-decompressor to replace "required" keep_log_if_failed: true # enrichment-using-javascript is another filter that use Goja for script-based enrichment # the raw log variable must be named "log" in the script # the existing attribute tags must be named "tags" in the script # the new attribute tags must be named "newTags" in the script # the raw log and existing attribute tags will be passed in the script before running # the new attribute tags will be exported from the script after script is done running # and these new tags will be added to the existing attribute tags # Note: The new tags will not override existing tags - only new key-value pairs will be added # Example log and attribute that the following script enriches: # Log: "{"app": "alpine", "service": "nonprod", "source": "tomcat"}"; Tags: {"namespace_name": "namespace-1"; "image_name": "image-1"} # New tags after script done running: {"service": "nonprod", "kube_namespace": "namespace-1"} # Source tags are as follow after script done running: {"namespace_name": "namespace-1"; "image_name": "image-1", "kube_namespace": "namespace-1", "service": "nonprod"} # Note: Enrichment is only ran in the first minute since it processes the first log - name: enrichment_script type: enrichment-using-javascript script: | let newTags = {}; var obj = JSON.parse(log); newTags.service = obj.service; newTags.kube_namespace = namespace_name; keep_log_if_failed: true # log-filter-javascript is a script-based filter in Javascript (Goja) # the raw log variable must be named "log" in the script # the boolean to decide whether to drop or pass the log must be named "pass" in the script. # if "pass" is true, then log is passed through, otherwise it is dropped at this filter # Example log filtering script and result are following: # Log: "{"app": "alpine", "service": "prod"}" # Script will return pass = true with above log # Log: "{"app": "alpine", "service": "nonprod"}" # Script will return pass = false with above log - name: log_filter_script type: log-filter-javascript script: | var obj = JSON.parse(log); var pass = false; if (obj.service === "prod") { pass = true; } keep_log_if_failed: true # gzip-decompressor is a filter that match and decompress gzipped log. Logs that doesn't match can pass through the filter by default - name: gzip_decompression type: gzip-decompressor keep_log_if_failed: true # timestamp extract filters are used to extract timestamp part from raw logs. # They only be applicable for source level not workflow. Please find the different kind of filters below: - name: regex_timestamp_detection type: timestamp pattern: (?:^([^\s]+)) # format should be in GoLang DateTime format (ref: https://pkg.go.dev/time#pkg-constants) format: 2006-01-02T15:04:05.000Z - name: timestamp_extract_from_json type: timestamp from_json: - path: msg.time # format should be in GoLang DateTime format (ref: https://pkg.go.dev/time#pkg-constants) format: 2006-01-02T15:04:05.000Z # all monitoring rules are defined in this section. # their names should be unique within rules section processors: cluster: name: clustering # clustering reports top N and bottom N clusters. N = num_of_clusters num_of_clusters: 100 # keep last 20 messages of each cluster samples_per_cluster: 20 # report cluster samples every 30 seconds reporting_frequency: 30s # default retention is 1h for clusters. # if it's set as 10 minutes for example, clusters that don't have any new logs for last 10 minutes will be dropped. retention: 10m # enables cpu aware rate limiter which throttle logs when agent cpu exceeds soft_cpu_limit defined in edsettings section. # by default this is disabled and a static rate limiter is enabled which allows at most 200 logs per second from single source. # if you want to maximize the sample size of edgedelta clustering then turn on cpu_friendly and set soft_cpu_limit to a few percent less than desired agent CPU usage. cpu_friendly: true # Puts a hard limit on how many logs should be clustered per second from a single source. If cpu_friendly is enabled then this will be ignored. throttle_limit_per_sec: 200 # Enable include_pattern_info_in_samples to include pattern info (pattern, pattern count, sentiment score) in the cluster sample as tags. Default is false include_pattern_info_in_samples: true regexes: # supported trigger thresholds: anomaly_probability_percentage, upper_limit_per_interval, lower_limit_per_interval, consecutive # ---------------- Simple Keyword Match Processor ---------------- - name: "error-regex" pattern: "error|ERROR|problem|ERR|Err" interval: 1m retention: 4h anomaly_confidence_period: 1h anomaly_tolerance: 0.2 only_report_nonzeros: true description: "Counts of messages including error per 2 minutes." trigger_thresholds: anomaly_probability_percentage: 90 # hits this condition if anomaly score > 90 upper_limit_per_interval: 250 # hits this condition if matched event count > 250 for last recording interval (10s). consecutive: 5 # if any of the other thresholds conditions are hit 5 times in a row then it will trigger alert. Default is 0 so the any condition hit would cause alert disable_reporting_in_prometheus: true # when this value is set to true, this regex processor will not expose its metrics in Prometheus format when rule_metrics_prom_stats_enabled is enabled # this value is only respected when aggregator is present. # setting this to true will cause generated metrics to be sent from processor. # this can be used to keep pod level information in generated metrics when aggregator is present. granular_metrics: false - name: "severity_high" # counts all logs whose severity matches with "HIGH|high". pattern: "HIGH|high" filters: - extract_severity # Notice that there's a field extractor filter specified. This regex rule will be working on the severity field values not the whole json log content. # ---------------- Dimension Counter Processor Examples ---------------- # if named captures in regex pattern are dimensions then occurrence stats will be generated. # in this example occurrence count for each HTTP method will be generated. # http_method_get.count, http_method_post.count... - name: "http-method" pattern: "] \"(?P\\w+)" dimensions: ["method"] # enabled_stats can be used to specify the stats generated from a regex rule. # defaults: # count, anomaly1 and anomaly2 metrics are generated for occurrence captures # count, min, max, avg, anomaly1 and anomaly2 metrics are generated for numeric captures # special stat types: # anomalymin: it takes min of anomaly1 and anomaly2. useful to reduce the alert noise enabled_stats: ["count", "anomalymin"] trigger_thresholds: lower_limit_per_interval: 2 # triggers if matched event count < 2 for last recording interval (10s). filters: - info # This is another dimension counter with dimensions_as_attributes: true. # The metrics generated from this processor has the same name but different attribute values. The dimension key/value is sent as an attribute. # By default, there is only a single attribute per metric. To group up multiple attribute use "dimensions_groups" # Sample generated metrics are following # http.count 1 {method="get"} # http.anomaly1 25 {method="get"} # http.count 1 {method="post"} # http.anomaly1 25 {method="post"} # http.count 2 {httpversion="1.1"} # http.anomaly1 25 {httpversion="1.1"} # http.count 2 {code="200"} # http.anomaly1 25 {code="200"} - name: "http-single" pattern: "] \"(?P\\w+) (?P\\S*) (?P\\S*)\" (?P\\d+)" dimensions: ["method", "httpversion", "code"] dimensions_as_attributes: true # An example of dimension counter with the use of dimensions_groups to group up attributes for metrics # dimensions_as_attribute must be enabled in order to use dimensions_groups # custom_suffix can be used to customize metric name genearated from a dimensions_group # There can be a single or multiple dimensions groups # Sample generated metrics are following # http-group.count 1 {method="get", code="200"} # http-group.anomaly1 25 {method="get", code="200"} # http-group.count 1 {method="post", code="200"} # http-group.anomaly1 25 {method="post", code="200"} # http-group_by_version.count 1 {method="get", httpversion="1.1"} # http-group_by_version.anomaly1 25 {method="get", httpversion="1.1"} # http-group_by_version.count 1 {method="post", httpversion="1.1"} # http-group_by_version.anomaly1 25 {method="post", httpversion="1.1"} # Note: dimension values must not not contain regex special characters such as "|", ".*" etc. for grouped dimension regex processor to work in aggregator agent # this is a limitation due to reverse extraction of dimension values from metric name's string representation in aggregator agent. e.g. http_group_methot_get_code_200 - name: "http-group" pattern: "] \"(?P\\w+) (?P\\S*)\" (?P\\d+)" dimensions: ["method", "httpversion", "code"] dimensions_as_attributes: true dimensions_groups: - selected_dimensions: ["method", "code"] - selected_dimensions: ["method", "httpversion"] custom_suffix: "by_version" # An example of dimension numeric capture with the use of dimensions_groups to group up attributes for metrics # The numeric dimension must be specified and there can only be one of them per regex processor # Sample log: 2022-08-20 08:21:14.288134 response=201 loglevel=INFO ostype=Unix service=one-packaging-ui source=syslog-test duration=41 svcTime=59128524 # Sample metrics generated from following processors # apidata_duration.avg 41 {"service":"one-packaging-ui source=syslog-test", "ostype":"Unix"} # with custom_suffix being used the metric will be like following: # apidata_duration_test_suffix.avg 41 {"service":"one-packaging-ui source=syslog-test", "ostype":"Unix"} - name: apidata pattern: ostype=(?P\w+).+?service=(?P.+?)\sduration=(?P\d+) dimensions: ['ostype','service'] numeric_dimension: "duration" dimensions_as_attributes: true enabled_stats: ["avg"] dimensions_groups: - selected_dimensions: ["service","ostype"] custom_suffix: "test_suffix" # ---------------- Dimension Numeric Capture Processor Example ---------------- # if both dimension and numeric captures defined in regex pattern then numeric stats per dimension per numeric value will be generated. # in this example numeric stats for each HTTP method will be generated. # http_request_method_get_latency.[avg|min|max|p95|p99|sum], http_request_method_post_latency.[avg|min|max|p95|p99|sum], - name: "http-request-latencies" pattern: "] \"(?P\\w+) took (?P\\d+) ms" dimensions: ["method"] # interval is 1 minute by default. Processing rule will collect all captured values # and at the end of this interval it will calculate metrics. e.g. calculate average, quantiles, anomaly scores for that duration. interval: 1m # retention is 3 hour by default. Anomaly scores are calculated based on the history of metrics. # keeping retention short will be more sensitive to spikes in metric values. retention: 1h # skip_empty_intervals will skip the intervals with no match so the overall average/stddev is not affected skip_empty_intervals: true trigger_thresholds: anomaly_probability_percentage: 1 # ---------------- Open TSDB metric Processor Example ---------------- # this processor is used to generate metrics from Opend TSDB metric data. # supported metric data format is "put " # tags are optional. This processor sends metrics as is, no aggregation is performed. # other configurations in this section are not cosidered when open_tsdb_metric is enabled. - name: "otsdb-metric" open_tsdb_metric: true # ---------------- Numeric Capture Processor Examples ---------------- - name: "flog" # capture matcher parsing both status code and responsize in flog format as numeric values name. pattern: " (?P\\d+) (?P\\d+)$" #metric names flog_statuscode, flog_responsesize. value_adjustment_rules: responsesize: operator: "/" operand: 1000.0 # apply division by 1000 for any responsesize match trigger_thresholds: anomaly_probability_percentage: 1 - name: "http-response-size" # capture matcher parsing responsize in flog format as http-response-size (unnamed single group). pattern: " (\\d+)$" trigger_thresholds: anomaly_probability_percentage: 1 # ---------------- Log to Metric Javascript transformation Example ---------------- # This can be used to generate metrics with javascript transformation script. # Metrics from this processor is not aggregated when aggregator is enabled. # The script should return a javascript object with following fields. (return operation is done with assigning result to log) # name: defines metric name (required) # value: defines metric value (required) # timestamp: defines metric timestamp (optional) # tags: additonal tags for the metric (optional) # tags is also a javascript object; object field name is tag key, object field value is tag value # the following example is for parsing OpenTSDB metrics with following format # put metric_name 1676026275 311 phpversion=8.0.27 host=hostname - name: "log-to-metric-js" pattern: "^put" script: | var parts = log.trim().split(" "); var tags = {}; for (var i = 4; i < parts.length; i++) { var tp = parts[i].trim().split("="); if(tp.length !== 2) { continue } tags[tp[0]] = tp[1] } var res = { name: parts[1], timestamp: parts[2], value: parts[3], tags: tags } log = res ratios: # supported trigger thresholds: anomaly_probability_percentage, upper_limit_per_interval, lower_limit_per_interval, consecutive - name: request-error-ratio # calculates ratio with following formula: failure / (failure+success) success_pattern: "request succeeded" failure_pattern: "request failed" trigger_thresholds: anomaly_probability_percentage: 50 traces: # supported trigger thresholds: max_duration, anomaly_probability_percentage, upper_limit_per_interval, lower_limit_per_interval, consecutive - name: login-trace start_pattern: "user (?P[0-9a-fA-F]{8}) logged in" finish_pattern: "user (?P[0-9a-fA-F]{8}) logged out" trigger_thresholds: max_duration: 50000 # 50 seconds security: - name: failed_login_detector # Regex to match: 53.138.44.43 - gorczany8437 [13/May/2020:20:12:03 +0000] "GET /granular/implement HTTP/2.0" 201 79206 "https://www.regionaloptimize.io/bleeding-edge/implement/matrix" "Mozilla/5.0 (Macintosh; PPC Mac OS X 10_8_9 rv:6.0; en-US) AppleWebKit/532.7.7 (KHTML, like Gecko) Version/6.2 Safari/532.7.7" pattern: (?P\S+)\s-\s+(?P\S+\s+)+\[(?P[^]]+)\]\s"(?P\S*)\s?(?P(?:[^"]*(?:\\")?)*)\s(?P[^"]*)"\s(?P\d+)\s(?P\d+)\s"(?P(?:[^"]*(?:\\")?)*)"\s"(?P.*)" detection_filter: field: status # to be used in with match pattern match_pattern: "401|403" # matching lines will be stored as offender threshold: 10 # if their match exceed given count window: 30m # in the specified window signature_field: ip # any matching elements with this signature will be filtered and transferred to configured destination top_ks: - name: top-api-requests # logs matching this pattern will be selected and named groups combined together will be the key of the record for which we keep counter. # example log to match below rule: "12.195.88.88 - joe [08/Aug/2020:05:57:49 +0000] "GET /optimize/engage HTTP/1.0" 200 19092" pattern: (?P\d+\.\d+\.\d+\.\d+) - \w+ \[.*\] "(?P\w+) (?P.+) HTTP\/\d.0" (?P.+) \d+ # every interval the top records will be reported and they will be removed locally interval: 30s # records are ordered by their count descendingly and top k items are picked for reporting. k: 10 # if a lower limit is provided only records whose count is greater than the limit will be able to make it to top k. lower_limit: 1 # separator is used to combine the named group values together to form a record key. Default is comma ','. # For example, # the pattern above would generate a record key like this "12.195.88.88,GET,/optimize/engage,200" # let's say this record has been seen 5 times in last period and it was one of the top k items. then below log will be reported: # "12.195.88.88,GET,/optimize/engage,200=5" separator: "," # thresholds are custom conditions that run on the generated or collected metrics # some basic thresholds can also be specified at the processor level. this is where more advanced scenarios can be covered. thresholds: # supported operators: '==', '>', '>=', '<', '<=' - name: http-latencyp95-threshold metric_name_pattern: http_request_method_.*_latency\.p95 # regex pattern operator: ">" value: 120 - name: http-avg-threshold # Either metric_name or metric_name_pattern must be provided but not both. metric_name: http_request_method_getconfig_latency.avg # exact metric name match operator: ">=" value: 50 - name: incoming-lines-threshold metric_name: incoming_lines.anomaly1 operator: ">" value: 90 - name: incoming-bytes-threshold metric_name: incoming_bytes.anomaly2 operator: ">" value: 90 - name: consecutive-bytes-threshold metric_name: incoming_bytes.anomaly2 operator: ">" value: 90 # The threshold must condition must be met this many times in a row to trigger an alert. consecutive: 5 # A multi-conditions threshold can be defined as follow # Only when all the conditions are hit an alert is triggered # metric_names must belong to same source and workflow # metric_name_regex will be supported for multi-conditions threshold in the future # A custom interval for flushing out state of the conditions (triggered/not triggered) in a multi-conditions threshold can be defined # default for this interval is 5s - name: http-requests-multiple-conditions-threshold type: and interval: 1m conditions: - metric_name: http_request_method_updateconfig_latency.avg operator: ">=" value: 100 - metric_name: http_request_method_deleteconfig_latency.max operator: ">" value: 125 consecutive: 5 # A conditional workflow triggers when the conditions are met. # Conditions are defined in `workflow_conditions` globally, and referenced # in `conditions` tag of workflows as an array of condition names # (please refer to `workflows.conditional-workflow.conditions`) # Supported conditions: # - on_demand: triggered by an event posted to /on_demand API endpoint # - source_version_change: triggered by the ed agent when a change is detected in source version workflow_conditions: - name: alert_on_my_service type: on_demand # all outputs are defined in this section. # each one must have a unique name within outputs section. outputs: streams: # statistics calculated by edgedelta agents can be sent to streaming endpoints - name: '{{ Env "TEST_SUMO" "sumo-us" }}' type: sumologic endpoint: "https://endpoint4.collection.us2.sumologic.com/receiver/v1/http/XYZ" custom_tags: "app": "transaction_manager" "region": "us-west-2" "File Path": "{{.FileGlobPath}}" "K8s PodName": "{{.K8sPodName}}" "K8s Namespace": "{{.K8sNamespace}}" "K8s ControllerKind": "{{.K8sControllerKind}}" "K8s ContainerName": "{{.K8sContainerName}}" "K8s ContainerImage": "{{.K8sContainerImage}}" "K8s ControllerLogicalName": "{{.K8sControllerLogicalName}}" "ECSCluster": "{{.ECSCluster}}" "ECSContainerName": "{{.ECSContainerName}}" "ECSTaskVersion": "{{.ECSTaskVersion}}" "ECSTaskFamily": "{{.ECSTaskFamily}}" "DockerContainerName": "{{.DockerContainerName}}" "ConfigID": "{{.ConfigID}}" # One can use either curly braces for templatizing (which has limited support for only direct variable replacement and index function) or square braces where one can employ full power of Go's template language "Host": "[[ .Host ]]" "Source": "[[ .Source ]]" "SourceType": "[[ .SourceType ]]" "Tag": "[[ .Tag ]]" - name: sumo-us-2 type: sumologic endpoint: '{{ Env "EMPTY" "https://endpoint4.collection.us2.sumologic.com/receiver/v1/http/XYZ" }}' # send_as_json can be used to send data to Sumologic in JSON format instead of the usual cost-optimized format (JSON format is easier to use/consume) send_as_json: true - name: datadog-default type: datadog api_key: '{{ Env "TEST_DD_APIKEY" }}' custom_tags: "app": "transaction_manager" "region": "us-west-2" "File Path": "{{.FileGlobPath}}" "K8s PodName": "{{.K8sPodName}}" "K8s Namespace": "{{.K8sNamespace}}" "K8s ControllerKind": "{{.K8sControllerKind}}" "K8s ContainerName": "{{.K8sContainerName}}" "K8s ContainerImage": "{{.K8sContainerImage}}" "K8s ControllerLogicalName": "{{.K8sControllerLogicalName}}" "ECSCluster": "{{.ECSCluster}}" "ECSContainerName": "{{.ECSContainerName}}" "ECSTaskVersion": "{{.ECSTaskVersion}}" "ECSTaskFamily": "{{.ECSTaskFamily}}" "DockerContainerName": "{{.DockerContainerName}}" "ConfigID": "{{.ConfigID}}" "Host": "{{.Host}}" "Source": "{{.Source}}" "SourceType": "{{.SourceType}}" "Tag": "{{.Tag}}" - name: datadog-custom type: datadog # If provided, custom installation of datadog log host can be reached. log_host: "" # If provided, custom installation of datadog metric host can be reached. metric_host: "" # If provided, custom installation of datadog event host can be reached. event_host: "" api_key: '{{ Env "TEST_DD_APIKEY" }}' features: metric custom_tags: "app": "starbucks_pos_transaction_manager" "region": "us-west-2" - name: datadog-alert-as-log type: datadog api_key: '{{ Env "TEST_DD_APIKEY" }}' features: metric, alert, edac alert_as_log: true # this indicates the alert will be sent as a log instead of event by default - name: datadog-buffered-output type: datadog api_key: '{{ Env "TEST_DD_APIKEY" }}' features: metric, alert, edac # when buffer path is set to a folder path, failed streaming data is temporarily stored in this folder to be retried until succeeds or ttl is reached # if the specified path does not exist, agent will create directories as necessary buffer_path: /var/log/edgedelta/pushbuffer/ # default ttl is 10m buffer_ttl: 2h # when buffer max size is set, failed streaming data will no longer be persisted after max size is reached buffer_max_bytesize: 100MB - name: newrelic type: newrelic log_host: "" metric_host: "" event_host: "" api_key: '{{ Env "TEST_NR_APIKEY" }}' account_name: '{{ Env "TEST_NR_ACCOUNT_ID" }}' features: metric,alert alert_as_log: true alert_as_event: true custom_tags: "app": "starbucks_pos_transaction_manager" "region": "us-west-2" "File Path": "{{.FileGlobPath}}" "K8s PodName": "{{.K8sPodName}}" "K8s Namespace": "{{.K8sNamespace}}" "K8s ControllerKind": "{{.K8sControllerKind}}" "K8s ContainerName": "{{.K8sContainerName}}" "K8s ContainerImage": "{{.K8sContainerImage}}" "K8s ControllerLogicalName": "{{.K8sControllerLogicalName}}" "ECSCluster": "{{.ECSCluster}}" "ECSContainerName": "{{.ECSContainerName}}" "ECSTaskVersion": "{{.ECSTaskVersion}}" "ECSTaskFamily": "{{.ECSTaskFamily}}" "DockerContainerName": "{{.DockerContainerName}}" "ConfigID": "{{.ConfigID}}" "Host": "{{.Host}}" "Source": "{{.Source}}" "SourceType": "{{.SourceType}}" "Tag": "{{.Tag}}" - name: honeycomb type: honeycomb host: "" #Host is the optional and default is "api.honeycomb.io" api_key: '{{ Env "TEST_HC_APIKEY" }}' dataset_name: "" unpacking: false features: metric,log,edac custom_tags: "app": "starbucks_pos_transaction_manager" "region": "us-west-2" "File Path": "{{.FileGlobPath}}" "K8s PodName": "{{.K8sPodName}}" "K8s Namespace": "{{.K8sNamespace}}" "K8s ControllerKind": "{{.K8sControllerKind}}" "K8s ContainerName": "{{.K8sContainerName}}" "K8s ContainerImage": "{{.K8sContainerImage}}" "K8s ControllerLogicalName": "{{.K8sControllerLogicalName}}" "ECSCluster": "{{.ECSCluster}}" "ECSContainerName": "{{.ECSContainerName}}" "ECSTaskVersion": "{{.ECSTaskVersion}}" "ECSTaskFamily": "{{.ECSTaskFamily}}" "DockerContainerName": "{{.DockerContainerName}}" "ConfigID": "{{.ConfigID}}" "Host": "{{.Host}}" "Source": "{{.Source}}" "SourceType": "{{.SourceType}}" "Tag": "{{.Tag}}" - name: appdynamics type: appdynamics host: "" #Host is the optional and default is "analytics.appdynamics.com" global_account_name: "global123" api_key: "12345" schema_name: "" features: metric,log,edac custom_tags: "app": "starbucks_pos_transaction_manager" "region": "us-west-2" "File Path": "{{.FileGlobPath}}" "K8s PodName": "{{.K8sPodName}}" "K8s Namespace": "{{.K8sNamespace}}" "K8s ControllerKind": "{{.K8sControllerKind}}" "K8s ContainerName": "{{.K8sContainerName}}" "K8s ContainerImage": "{{.K8sContainerImage}}" "K8s ControllerLogicalName": "{{.K8sControllerLogicalName}}" "ECSCluster": "{{.ECSCluster}}" "ECSContainerName": "{{.ECSContainerName}}" "ECSTaskVersion": "{{.ECSTaskVersion}}" "ECSTaskFamily": "{{.ECSTaskFamily}}" "DockerContainerName": "{{.DockerContainerName}}" "ConfigID": "{{.ConfigID}}" "Host": "{{.Host}}" "Source": "{{.Source}}" "SourceType": "{{.SourceType}}" "Tag": "{{.Tag}}" - name: elastic-cloud type: elastic index: "index name" # cloud_id: "ed-stage-deployment:dXMtd2VzdC0yLmF3cy5mb3VuZC5pbyRmb28kYmFy" token: "" # features is a comma separated string that contains the list of datasets which are enabled for this streaming destination. # all streaming destinations support features field but not all of them support full list of datasets. e.g. some destinations only support metrics # possible dataset types: metric (all kind of metrics), cluster (patterns and samples), context (contextual logs), log (forwarding all logs) # if features is unset default will be "metric,cluster,context" features: metric,cluster,context - name: elastic-local type: elastic index: "index name" user: elastic password: '{{ Env "TEST_ELASTIC_PWD" }}' address: - elasticnode1 - name: elastic-send-as-is type: elastic index: "index name" user: elastic password: '{{ Env "TEST_ELASTIC_PWD" }}' address: - elasticnode1 features: edac # send_as_is indicates the string typed data (edac, log) should be send as is without wrapping with a json object. send_as_is: true # edac_enrichment defines which edac related fields should be added to the original JSON payload just before pushing. # This only applies to elastic streams and when send_as_is=true. edac_enrichment: edac_id_field: "edac_id" # final JSON will have an additional "edac_id" field whose value will be the EDAC Id metric_name_field: "name" # final JSON will have an additional "name" field (or will be overriden if name exists) whose value is metric name - name: elastic-opensearch-with-rolearn type: elastic index: "index name" # region is used for AWS ElasticSearch (OpenSearch) region: "us-west-2" # role_arn is used if assuming an aws iam role. role_arn: "arn:aws:iam:::role/" # external_id is optional, it is a unique identifier to avoid confused deputy attack. # ref: https://docs.aws.amazon.com/IAM/latest/UserGuide/confused-deputy.html external_id: "external_id" address: - opensearch_domain_endpoint custom_tags: "app": "test" # app and region defined in agent_settings can be referred here "region": "us-west-2" "File Path": "{{.FileGlobPath}}" "K8s PodName": "{{.K8sPodName}}" "K8s Namespace": "{{.K8sNamespace}}" "K8s ControllerKind": "{{.K8sControllerKind}}" "K8s ContainerName": "{{.K8sContainerName}}" "K8s ContainerImage": "{{.K8sContainerImage}}" "K8s ControllerLogicalName": "{{.K8sControllerLogicalName}}" "ECSCluster": "{{.ECSCluster}}" "ECSContainerName": "{{.ECSContainerName}}" "ECSTaskVersion": "{{.ECSTaskVersion}}" "ECSTaskFamily": "{{.ECSTaskFamily}}" "DockerContainerName": "{{.DockerContainerName}}" "ConfigID": "{{.ConfigID}}" "Host": "{{.Host}}" "Source": "{{.Source}}" "SourceType": "{{.SourceType}}" "Tag": "{{.Tag}}" "logical_source": '{{ index .CustomTags "logicalSource" }}' # tags coming from custom sources can be referred in that way (also logical source field) "url": '{{ index .ObservationTags "url" }}' # dimensions from a dimension regex processor can be referred here "cluster": '{{ index .ObservationTags "cluster" }}' # or other tags that are specified via ED_CUSTOM_TAGS=cluster:abc can be referred here "level": '{{ index .ObservationTags "level" }}' # this level tag comes from log-level-detector filter - name: elastic-opensearch type: elastic index: "index name" region: "us-west-2" address: - opensearch_domain_endpoint - name: elastic-apm type: elastic index: "index name" user: elastic password: '{{ Env "TEST_ELASTIC_PWD" }}' address: - elasticnode1 features: log # For the remaining two field below, it is required to have "buffered-elastic-apm-processor" in the workflow apm_server_url: http://localhost:8200 apm_token: test-token - name: elastic-send-as-is-with-options type: elastic index: "index name" region: "us-west-2" role_arn: "arn:aws:iam:::role/" external_id: "external_id" address: - opensearch_domain_endpoint features: log send_as_is: true # send_as_is_options used for JSON messages, and it is applicable only send_as_is is set send_as_is_options: # nest_under can be used to nest all the content of JSON log under a custom field. # e.g. a log message like this: {pid: 1223, pname: os_stat_check} # would be sent to elastic like this: {tag: "prod", src_type: "File",..., msg.pid: 1223, msg.pname: os_stat_check} # Top level fields are ED metadata fields and msg.* contains the log JSON nest_under: msg # include_ed_metadata is used to send all ed metadata fields at top level fields in JSON, default is false include_ed_metadata: true # on_failure_options is used for the incoming raw data not in JSON format. # In this case a JSON object is created and raw message is populated under "sub_field_name". on_failure_options: sub_field_name: "nested_field" - name: elastic-disable-verify-certificate type: elastic index: "index name" user: elastic password: '{{ Env "TEST_ELASTIC_PWD" }}' address: - elasticnode1 tls: # Optional to disable a TLS verification of a certificate. Default is false. disable_verify: true - name: edport-with-endpoint type: edport endpoint: "http://localhost:4545/v0/collect" - name: edport-with-endpoint-and-schema type: edport endpoint: "http://localhost:4545/v0/collect" schema: FlattenedObservation - name: edport-with-tcp type: edport host: "log-repo-host" port: 23131 pool_size: 1 - name: edport-with-tcp-and-schema type: edport host: "log-repo-host" port: 23131 schema: FlattenedObservation - name: cloudwatch type: cloudwatch region: us-west-2 log_group_name: test_log_group log_stream_name: test_log_stream # monitored container can override the default values of log group name, logs stream name and log stream prefix, by setting ed_log_group_name, ed_log_stream_name, ed_log_stream_prefix labels # Default value is false. allow_label_override: true # Default value is false. auto_create: true - name: cloudwatch-prefix type: cloudwatch region: us-west-2 log_group_name: test_log_group # CloudWatch log stream prefix (either name or prefix is supported not both) log_stream_prefix: ecs # monitored container can override the default values of log group name, logs stream name and log stream prefix, by setting ed_log_group_name, ed_log_stream_name, ed_log_stream_prefix labels features: log - name: cloudwatch-auto type: cloudwatch region: us-west-2 # only supported for ECS environments, and when provided only region configuration can be provided. Automatically create; # LogGroupName in the format of /ecs/task_definition_family # LogsStreamPrefix in the format of ecs/container_name/task_id # Default value is false. auto_configure: true auto_create: true features: log - name: cloudwatch-assumes-role type: cloudwatch region: us-west-2 log_group_name: test_log_group log_stream_name: test_log_stream # role_arn is used for assuming an iam role. To see how it works ref: https://docs.aws.amazon.com/IAM/latest/UserGuide/tutorial_cross-account-with-roles.html role_arn: "arn:aws:iam:::role/" # external_id increases the security of the role by requiring an optional external identifier, which prevents "confused deputy" attacks. external_id: "053cf606-8e80-47bf-b849-8cd1cc826cfc" - name: app-insights type: azure # application insight connection string can be grabbed from azure portal which contains both endpoint and instrumentation key endpoint: https://dc.services.visualstudio.com/v2/track # api_key corresponds to app insight's instrumentation key api_key: '{{ Env "TEST_AZURE_APIKEY" }}' - name: app-insights-with-base-type type: azure endpoint: https://dc.services.visualstudio.com/v2/track api_key: '{{ Env "TEST_AZURE_APIKEY" }}' # BaseType is for Azure pusher and can be: "MessageData": tracing index, "EventData": event index base_type: EventData - name: signalfx type: signalfx endpoint: https://ingest.us1.signalfx.com/v2 token: '{{ Env "TEST_SIGNALFX_TOKEN" }}' features: metric - name: humio type: humio endpoint: http://localhost:8080 token: '{{ Env "TEST_HUMIO_TOKEN" }}' features: log - name: loggly type: loggly endpoint: https://logs-01.loggly.com token: token12345 features: log grouped_events: true # it will enable grouping event feature for loggly, meaning that one payload per observation group will be generated. "events" field will be used instead of "event". - name: graylog type: graylog host: "localhost" port: 5555 features: metric,log,health,alert,event custom_tags: "ConfigID": "{{.ConfigID}}" "Host": "{{.Host}}" "Source": "{{.Source}}" "SourceType": "{{.SourceType}}" "Tag": "{{.Tag}}" - name: kafka type: kafka endpoint: localhost:2888,localhost:3888 # brokers topic: example_kafka_topic required_acks: 10 batch_size: 1000 batch_bytes: 10000 batch_timeout: 1m async: true max_message_size: 10KB features: log,metric send_as_is: true # Possible for only string values tls: disable_verify: true ca_file: /var/etc/kafka/ca_file ca_path: /var/etc/kafka crt_file: /var/etc/kafka/crt_file key_file: /var/etc/kafka/keyfile key_password: p@ssword123 client_auth_type: noclientcert # possible selections: noclientcert, requestclientcert, requireanyclientcert, verifyclientcertifgiven, requireandverifyclientcert sasl: username: kafka_username password: p@ssword123 mechanism: PLAIN # possible selections: PLAIN, SCRAM-SHA-256, SCRAM-SHA-512 custom_tags: "ConfigID": "{{.ConfigID}}" "Host": "{{.Host}}" "Source": "{{.Source}}" "SourceType": "{{.SourceType}}" "Tag": "{{.Tag}}" - name: influxdb-integration type: influxdb endpoint: "https://influxdb..com/" token: YOUR_API_TOKEN # empty version or version 2.x requires bucket and organization info bucket: testbucket organization: yourorganization port: 443 - name: influxdb-integration-v1x type: influxdb version: 1.x endpoint: "https://influxdb..com/" token: YOUR_API_TOKEN port: 443 http_user: admin http_password: your_http_password # version 1.x requires db info db: "specific_influxdb_database" - name: scalyr-integration type: scalyr endpoint: "https://app.scalyr.com/api/uploadLogs?token={scalyr log access write key}" # integrations can be referred in the config via integration_name. # if no name override is provided then integration_name value should be used in the workflow destinations. # rest of the fields are overridable - integration_name: orgs-splunk name: edac-splunk-dest features: edac index: edac-index - integration_name: orgs-splunk name: metric-splunk-dest features: metric index: metric-index - name: my-splunk type: splunk # 2 different types of endpoint for Splunk can be configured here # "services/collector/event": with this endpoint ED agent will send data in a cost-optimized format # "services/collector/raw": using this endpoint ED agent will send data in ED-customized format with lots of details endpoint: "://:/" token: "32-character GUID token" custom_tags: "app": "test" "region": "us-west-2" "File Path": "{{.FileGlobPath}}" "K8s PodName": "{{.K8sPodName}}" "K8s Namespace": "{{.K8sNamespace}}" "K8s ControllerKind": "{{.K8sControllerKind}}" "K8s ContainerName": "{{.K8sContainerName}}" "K8s ContainerImage": "{{.K8sContainerImage}}" "K8s ControllerLogicalName": "{{.K8sControllerLogicalName}}" "ECSCluster": "{{.ECSCluster}}" "ECSContainerName": "{{.ECSContainerName}}" "ECSTaskVersion": "{{.ECSTaskVersion}}" "ECSTaskFamily": "{{.ECSTaskFamily}}" "DockerContainerName": "{{.DockerContainerName}}" "ConfigID": "{{.ConfigID}}" "Host": "{{.Host}}" "Source": "{{.Source}}" "SourceType": "{{.SourceType}}" "Tag": "{{.Tag}}" - name: my-splunk-disable-verify-certificate type: splunk endpoint: "://:/" token: "32-character GUID token" tls: # Optional to disable a TLS verification of a certificate. Default is false. disable_verify: true - name: wavefront-integration type: wavefront endpoint: "https://{your wavefront domain}.wavefront.com/report" token: "" # metrics will be grouped under this name at wavefront, default value is ~edgedelta root_metric_name: "~edgedelta" # when set false, metrics sent to wavefront do not contain agent tag in their name. default value is true include_agent_tag_in_metric_name: false - name: logzio type: logzio endpoint: "https://app-eu.logz.io:8071" token: "" metric_token: "" custom_tags: "app": "starbucks_pos_transaction_manager" "region": "us-west-2" "File Path": "{{.FileGlobPath}}" "K8s PodName": "{{.K8sPodName}}" "K8s Namespace": "{{.K8sNamespace}}" "K8s ControllerKind": "{{.K8sControllerKind}}" "K8s ContainerName": "{{.K8sContainerName}}" "K8s ContainerImage": "{{.K8sContainerImage}}" "K8s ControllerLogicalName": "{{.K8sControllerLogicalName}}" "ECSCluster": "{{.ECSCluster}}" "ECSContainerName": "{{.ECSContainerName}}" "ECSTaskVersion": "{{.ECSTaskVersion}}" "ECSTaskFamily": "{{.ECSTaskFamily}}" "DockerContainerName": "{{.DockerContainerName}}" "ConfigID": "{{.ConfigID}}" "Host": "{{.Host}}" "Source": "{{.Source}}" "SourceType": "{{.SourceType}}" "Tag": "{{.Tag}}" - name: fluentd-log-fwd type: fluentd host: log-repo-host port: 23131 encoder: msgpack pool_size: 10 # tag_prefix; agent setting tag value is appended to this prefix # and used as fluentd forward tag (the payload itself will still have edgedelta_tag=agentsettings.tag) # tag_prefix is only used as fluentd tag if the corresponding data doesn't have a tag defined in enrichments tag_prefix: "tail.ed." features: log # when buffer path is set to a folder path, failed streaming data is temporarily stored in this folder to be retried until succeeds or ttl is reached # if the specified path does not exist, agent will create directories as necessary buffer_path: /var/log/edgedelta/pushbuffer/ # default ttl is 10m buffer_ttl: 2h custom_tags: "Host": "{{.Host}}" "Source": "{{.Source}}" "SourceType": "{{.SourceType}}" "Tag": "{{.Tag}}" # when buffer max size is set, failed streaming data will no longer be persisted after max size is reached buffer_max_bytesize: 100MB - name: loki-integration type: loki endpoint: "https://localhost:3000/loki/api/v1/push" api_key: "api_key" user: "user" custom_tags: "app": "test" "region": "us-west-2" "File Path": "{{.FileGlobPath}}" "K8s PodName": "{{.K8sPodName}}" "K8s Namespace": "{{.K8sNamespace}}" "K8s ControllerKind": "{{.K8sControllerKind}}" "K8s ContainerName": "{{.K8sContainerName}}" "K8s ContainerImage": "{{.K8sContainerImage}}" "K8s ControllerLogicalName": "{{.K8sControllerLogicalName}}" "ECSCluster": "{{.ECSCluster}}" "ECSContainerName": "{{.ECSContainerName}}" "ECSTaskVersion": "{{.ECSTaskVersion}}" "ECSTaskFamily": "{{.ECSTaskFamily}}" "DockerContainerName": "{{.DockerContainerName}}" "ConfigID": "{{.ConfigID}}" "Host": "{{.Host}}" "Source": "{{.Source}}" "SourceType": "{{.SourceType}}" "Tag": "{{.Tag}}" message_template: "File Path": "{{.FileGlobPath}}" "K8s PodName": "{{.K8sPodName}}" "K8s Namespace": "{{.K8sNamespace}}" "K8s ControllerKind": "{{.K8sControllerKind}}" "K8s ContainerName": "{{.K8sContainerName}}" "K8s ContainerImage": "{{.K8sContainerImage}}" "K8s ControllerLogicalName": "{{.K8sControllerLogicalName}}" "ECSCluster": "{{.ECSCluster}}" "ECSContainerName": "{{.ECSContainerName}}" "ECSTaskVersion": "{{.ECSTaskVersion}}" "ECSTaskFamily": "{{.ECSTaskFamily}}" "DockerContainerName": "{{.DockerContainerName}}" "ConfigID": "{{.ConfigID}}" "Host": "{{.Host}}" "Source": "{{.Source}}" "SourceType": "{{.SourceType}}" "Tag": "{{.Tag}}" - name: loki-send-alert-as-log type: loki endpoint: "https://localhost:3000/loki/api/v1/push" api_key: "api_key" user: "user" custom_tags: "app": "test" "region": "us-west-2" message_template: "SourceType": "{{.SourceType}}" "Tag": "{{.Tag}}" alert_as_log: true # this indicates the alert will be sent as a log instead of event by default - name: dynatrace type: dynatrace log_endpoint: "https://{your-environment-id}.live.dynatrace.com/api/v2/logs/ingest" metric_endpoint: "https://{your-environment-id}.live.dynatrace.com/api/v2/metrics/ingest" token: "" custom_tags: "region": "us-west-2" "File Path": "{{.FileGlobPath}}" "K8s PodName": "{{.K8sPodName}}" "K8s Namespace": "{{.K8sNamespace}}" "K8s ControllerKind": "{{.K8sControllerKind}}" "K8s ContainerName": "{{.K8sContainerName}}" "K8s ContainerImage": "{{.K8sContainerImage}}" "K8s ControllerLogicalName": "{{.K8sControllerLogicalName}}" "ECSCluster": "{{.ECSCluster}}" "ECSContainerName": "{{.ECSContainerName}}" "ECSTaskVersion": "{{.ECSTaskVersion}}" "ECSTaskFamily": "{{.ECSTaskFamily}}" "DockerContainerName": "{{.DockerContainerName}}" "ConfigID": "{{.ConfigID}}" "Host": "{{.Host}}" "Source": "{{.Source}}" "SourceType": "{{.SourceType}}" "Tag": "{{.Tag}}" - name: my-otlp-destination type: otlp host: localhost port: 8686 # Currently logs and metrics are just supported for pass-thru use case. # For traces we actually have trace optimizer features: log # traces, logs and metrics are considered as log - name: eventhub-stream type: eventhubstream endpoint: "https://namespace.servicebus.windows.net/hub/messages" token: "azure-ad-token" - name: generic-edport-stream-https type: edport endpoint: https://test.com/catcher schema: FlattenedObservation pool_size: 5 features: log - name: generic-edport-stream-tcp type: edport host: log-repo-host port: 23131 pool_size: 5 features: log,edac,metric custom_tags: "Host": "{{.Host}}" "Source": "{{.Source}}" "SourceType": "{{.SourceType}}" "Tag": "{{.Tag}}" - name: cribl-http type: cribl endpoint: http://in.logstream..cribl.cloud:10080/crible/_bulk token: "" features: log,edac,metric,alert - name: my-openmetrics type: openmetrics endpoint: "http://localhost:8428/metrics" features: metric custom_tags: "app": "test" "region": "us-west-2" "File Path": "{{.FileGlobPath}}" "K8s PodName": "{{.K8sPodName}}" "K8s Namespace": "{{.K8sNamespace}}" "K8s ControllerKind": "{{.K8sControllerKind}}" "K8s ContainerName": "{{.K8sContainerName}}" "K8s ContainerImage": "{{.K8sContainerImage}}" "K8s ControllerLogicalName": "{{.K8sControllerLogicalName}}" "ECSCluster": "{{.ECSCluster}}" "ECSContainerName": "{{.ECSContainerName}}" "ECSTaskVersion": "{{.ECSTaskVersion}}" "ECSTaskFamily": "{{.ECSTaskFamily}}" "DockerContainerName": "{{.DockerContainerName}}" "ConfigID": "{{.ConfigID}}" "Host": "{{.Host}}" "Source": "{{.Source}}" "SourceType": "{{.SourceType}}" "Tag": "{{.Tag}}" - name: my-s3-streamer type: s3stream aws_key_id: '{{ Env "AWS_KEY_ID" }}' aws_sec_key: '{{ Env "AWS_SECRET_KEY" }}' bucket: testbucket region: us-east-2 flush_interval: 30s # Default is 3 minutes flush_bytesize: 1M # Having a byte size parameter means that if the given buffer reachs given threshold, it will be flushed even if flush interval is not yet elapsed custom_tags: "Host": "{{.Host}}" "Source": "{{.Source}}" "SourceType": "{{.SourceType}}" "Tag": "{{.Tag}}" - name: my-observeinc type: observeinc endpoint: "http://localhost:5555" features: metric,log,health,alert,event custom_tags: "Host": "{{.Host}}" "Source": "{{.Source}}" "SourceType": "{{.SourceType}}" "Tag": "{{.Tag}}" # GCP cloud monitoring (stackdriver) is only supported by metrics now. - name: my-cloudmonitoring type: cloudmonitoring project_id: edgedelta key: '{{ Env "CLOUDMONITORING_KEY" }}' features: metric triggers: # anomaly captures can be sent to trigger endpoints such as slack. - name: error-anomaly-slack type: slack endpoint: "" # suppression_window can be configured to avoid duplicate alerts. It defaults to 20 minutes when unset. suppression_window: 60m # supported suppression modes: 'local' and 'global'. default mode is local. # local would suppress if there was an alert fired for same source+rule from this agent # global would suppress if there was an alert fired for source+rule from any agent that shares same tag/configid suppression_mode: global # notify_content is optional way to customize the notification content. It supports templating. # available template fields: EDAC, Source, SourceType, MetricName, Tag, Host, ConfigID, Timestamp... # important notes about templates you should read before use: # - if the value is empty the item will not be sent to slack # - the keys are sorted alphabetically before sending to slack so they will not appear in the order specified in the config # Users can also use Sprig (http://masterminds.github.io/sprig/) functions over here notify_content: title: "Anomaly Detected: {{.ProcessorDescription}}" # disable_default_fields is used for disabling default fields in notify message. disable_default_fields: false # advanced_content provides full flexibility to defined the payload in slack notification post requests # important notes about advanced_content template should read before use; # - overides all other settings, if custom_fields, title or disable_default_fields are provided then they will be ignored. # - same set of templating fields are supported as in custom_fields # - author is responsible to make sure validity of the json # - use block kit builder tool provided by slack https://app.slack.com/block-kit-builder prior to test advanced_content: | { "blocks": [ { "type": "section", "text": { "type": "mrkdwn", "text": "*Raw POST Anomaly Detected: [[ .ProcessorDescription ]]*" } }, { "type": "section", "text": { "type": "mrkdwn", [[ if contains .MatchedTerm "error" ]] "text": "*MatchedTerm* [[ .MatchedTerm ]]\n*ConfigID* [[ .ConfigID ]]" [[ else ]] "text": "*ERROR MatchedTerm* [[ .MatchedTerm ]]\n*ConfigID* [[ .ConfigID ]]" [[ end ]] } } ] } custom_fields: "Dashboard": "https://app.edgedelta.com/investigation?edac={{.EDAC}}×tamp={{.Timestamp}}" "Current Value": "{{.CurrentValue}}" "Threshold Value": "{{.ThresholdValue}}" "Custom Message": "{{.CurrentValue}} exceeds {{.ThresholdValue}}" "Built-in Threshold Description": "{{.ThresholdDescription}}" "Matched Term": "{{.MatchedTerm}}" "Threshold Type": "{{.ThresholdType}}" "File Path": "{{.FileGlobPath}}" "K8s PodName": "{{.K8sPodName}}" "K8s Namespace": "{{.K8sNamespace}}" "K8s ControllerKind": "{{.K8sControllerKind}}" "K8s ContainerName": "{{.K8sContainerName}}" "K8s ContainerImage": "{{.K8sContainerImage}}" "K8s ControllerLogicalName": "{{.K8sControllerLogicalName}}" "ECSCluster": "{{.ECSCluster}}" "ECSContainerName": "{{.ECSContainerName}}" "ECSTaskVersion": "{{.ECSTaskVersion}}" "ECSTaskFamily": "{{.ECSTaskFamily}}" "DockerContainerName": "{{.DockerContainerName}}" "SourceAttributes": "{{.SourceAttributes}}" "ConfigID": "{{.ConfigID}}" "EDAC": "{{.EDAC}}" "Epoch": "{{.Epoch}}" "Host": "{{.Host}}" "MetricName": "{{.MetricName}}" "Source": "{{.Source}}" "SourceType": "{{.SourceType}}" "Tag": "{{.Tag}}" # organization level integrations can be referred via their name. in the example below "ed-alert-slack" is an org level slack integration - integration_name: ed-alert-slack suppression_window: 30m suppression_mode: local notify_content: advanced_content: | { "blocks": [ { "type": "section", "text": { "type": "mrkdwn", "text": "*{{.Tag}}: {{.ThresholdDescription}}*" } }, { "type": "section", "text": { "type": "mrkdwn", "text": "*MatchedTerm* {{.MatchedTerm}}" } } ] } # anomaly captures can be sent to generic webhook endpoints with custom JSON payload such as pagerduty/moogsoft - name: pagerduty-integration type: pagerduty endpoint: https://api.pagerduty.com/incidents custom_headers: Accept: "application/vnd.pagerduty+json;version=2" Content-Type: "application/json" Authorization: "Token token=XXXXXXXXXX" # pagerduty API token From: "user@edgedelta.com" # user must be a member of the pagerduty account used notify_content: advanced_content: | { "incident": { "type": "incident", "title": "{{.Tag}} {{.Host}} Disk usage at 85%", "service": { "id": "", "type": "service_reference" }, "body": { "type": "incident_body", "details": "A disk is getting full on this machine. You should investigate what is causing the disk to fill, and ensure that there is an automated process in place for ensuring data is rotated (eg. logs should have logrotate around them). If data is expected to stay on this disk forever, you should start planning to scale up to a larger disk." } } } - name: error-anomaly-moogsoft-webhook type: webhook endpoint: "localhost" # basic auth username username: user1 # basic auth password password: 12345 custom_headers: header1: value1 # JSON template variables available: Tag, Host, MetricName, Source, SourceType, EDAC, Epoch, ConfigID # Templating only applies to string fields. # For moogsoft mapping check: # https://docs.moogsoft.com/AIOps.6.5.0/Alerts_Overview.html # https://docs.moogsoft.com/en/webhook.html # TODO: IP or location for source_id and agent_location payload: signature: "{{.MetricName}}" source_id: "{{.Host}}" external_id: "{{.EDAC}}" manager: "edgedelta" source: "{{.Host}}" class: "application" agent_location: "{{.Host}}" type: "{{.SourceType}}" severity: 3 description: "high network utilization in application A" agent_time: "{{.Epoch}}" - name: moogsoft-default type: moogsoft endpoint: "localhost" api_key: "moogsoft-apikey" notify_content: custom_fields: "jira-ticket": "ticket" # default payload taken from RemdeyAPI Helix Document, might be subject to change. # https://docs.bmc.com/docs/itsm2102/example-of-using-the-rest-api-to-create-an-incident-entry-974495996.html - name: remedy-default type: remedy endpoint: "localhost" token: remedy-token notify_content: custom_fields: "test-field": "test" custom_headers: X-header1: "test-header" - name: bigpanda-default type: bigpanda endpoint: "https://api.bigpanda.io/data/v2/alerts" token: panda-token app_key: panda-app-key notify_content: custom_fields: "test-field": "test" custom_headers: X-header1: "test-header" - name: eventhub-test type: eventhub endpoint: https://eventshub-test.servicebus.windows.net/test/messages token: "test-token" notify_content: custom_fields: "test-field": "test" custom_headers: X-header1: "test-header" - name: email type: webhook endpoint: "https://api.edgedelta.com/v1/orgs/-/triggers/emails" suppression_window: 30m suppression_mode: local # Custom headers and payload support templating. # Template variables available: Tag, Host, MetricName, Source, SourceType, EDAC, Epoch, ConfigID custom_headers: X-ED-Config-Id: "{{.ConfigID}}" # Payload is a dictionary of key value pairs. Values can be string, numbers or lists. # Only string valued fields support templating. # Payload will be converted to a json and sent in the body of the request. payload: RequestID: "{{.ConfigID}}-{{.Epoch}}" ConfigID: "{{.ConfigID}}" Recipients: ["taylan@edgedelta.com"] Subject: "Anomaly Detected - EDAC [{{.EDAC}}]" Body: |

Edgedelta found anomaly!

EDAC: {{.EDAC}}
Tag: {{.Tag}}
Host: {{.Host}}
Source: {{.Source}}
MetricName: {{.MetricName}}
- name: microsoft-teams-integration type: teams endpoint: "https://outlook.office.com/webhookb2/XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX@XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX/IncomingWebhook/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX" notify_content: title: "Anomaly Detected: {{.ProcessorDescription}}" disable_default_fields: false custom_fields: "Dashboard": "https://app.edgedelta.com/investigation?edac={{.EDAC}}×tamp={{.Timestamp}}" "Current Value": "{{.CurrentValue}}" "Threshold Value": "{{.ThresholdValue}}" "Custom Message": "{{.CurrentValue}} exceeds {{.ThresholdValue}}" "Matched Term": "{{.MatchedTerm}}" - name: jira-integration type: jira endpoint: "https://automation.codebarrel.io/pro/hooks/XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX" notify_content: advanced_content: | { "data": { "title": "{{.Tag}}", "message": "{{.MatchedTerm}}", } } - name: aws-lambda-integration type: awslambda endpoint: "https://XXXXXXXXXX.execute-api.XXXXXXXXX.amazonaws.com/XXXX/XXXXXX" notify_content: advanced_content: | { "foo": "bar", } - name: azure-functions-integration type: azurefunctions endpoint: "https://XXXXXXXXXX.azurewebsites.net/XXXX/XXXXXX" notify_content: advanced_content: | { "foo": "bar", } - name: victorops-integration type: victorops endpoint: "https://api.victorops.com/api-public/v1/incidents" # https://portal.victorops.com/dash/edgedelta#/api-management # Use the "api id" together with an "api key" when making API calls. custom_headers: X-VO-Api-Id: "api-id" X-VO-Api-Key: "api-key" notify_content: advanced_content: | { "summary": "{{ .Title }} - {{ .Message }}", "details": "https://app.edgedelta.com/investigation?edac={{.EDAC}}×tamp={{.Timestamp}}", "userName": "username", "targets": [ { "type": "EscalationPolicy", "slug": "team-xxxxxxx" } ], "isMultiResponder": false } # Archive destinations are specified in this section. There can only be at most one workflow with archive destinations # Raw logs are compressed and sent periodically when one of these conditions hit: # - Compressed logs size reaches 16MB (configurable via agent_settings.archive_max_byte_limit) # - 30 minutes passes (configurable via agent_settings.archive_flush_interval) # # The destination bucket will receive compressed logs as gzip file every 5min (or more frequent on high volume environments). Max file size is 16MB. # The gzip files have json lines containing raw log and other source attributes # folder path format: {bucket}/{year}/{month}/{day}/{hour}/{tag}/{host}/{random id}.log.gz # example: testbucket/2021/05/14/21/prod/host-1/1sXpYZPs83808oGZPJExJXlLXrb.log.gz archives: - name: my-s3 type: s3 aws_key_id: '{{ Env "AWS_KEY_ID" }}' aws_sec_key: '{{ Env "AWS_SECRET_KEY" }}' bucket: testbucket region: us-east-2 # This s3 archiver is getting creds by assuming a role which is already created by the customer. ref:https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html - name: my-s3-assumes-role type: s3 # role_arn is used for assuming an iam role. To see how it works ref: https://docs.aws.amazon.com/IAM/latest/UserGuide/tutorial_cross-account-with-roles.html role_arn: "arn:aws:iam::1234567890:role/ed-s3-archiver-role" # external_id increases the security of the role by requiring an optional external identifier, which prevents "confused deputy" attacks. external_id: "053cf606-8e80-47bf-b849-8cd1cc826cfc" bucket: testbucket region: us-east-2 - name: my-s3-archiver type: s3 aws_key_id: '{{ Env "AWS_KEY_ID" }}' aws_sec_key: '{{ Env "AWS_SECRET_KEY" }}' bucket: testbucket region: us-east-2 # This flag will disable metadata file ingestion (which is used for rehydration analysis). It is useful for AWS Athena where only data files should be present in the given S3 bucket + path prefix disable_metadata_ingestion: true # This feature should be used with another agent (like aggregator) that is started with ED_AGENT_MODE=archiver and ED_ARCHIVER_PORT=. # With this approach, this integration will forward its logs to this agent running as archiver (which will decrease decoupling) # Keep in mind that, this feature is still in alpha stage archiver: endpoint: http://localhost:5555 # Default path prefix is /////, however with this field this can be overrided path_prefix: # One can refer to "Year", "Month", "Day", " Minute", "Hour", "Tag", # "Host", "OtherTags." and "LogTags." # For ECS, "ECSCluster", "ECSContainerName", "ECSTaskFamily" and "ECSTaskVersion" are available # For K8s, "K8sNamespace", "K8sControllerKind", "K8sControllerLogicalName", "K8sPodName", "K8sContainerName" and "K8sContainerImage" are available # For Docker, "DockerContainerName" and "DockerImageName" are available order: - Year - Month - Day - Hour - 5 Minute - OtherTags.role # Format should have exactly same amount of "%s"s with "order"s count and templating will be done in the same order given via "order" fields # Curly braces are prohibited # Also, note that this format is not supported in rehydrations so the source for rehydration cannot be an integration using custom path_prefix format # This should be used for some Big Data applications such BigQuery, AWS Athena etc. format: year=%s/month=%s/day=%s/hour=%s/minute=%s/role=%s/ # GCS destination authentication is done using HMAC keys for service accounts. # See https://cloud.google.com/storage/docs/authentication/managing-hmackeys for details on how to create new keys - name: my-gcs type: gcs hmac_access_key: my_hmac_access_key_123 hmac_secret: my_hmac_secret_123 bucket: ed-test-bucket # Azure blob archiver - name: my-blob type: blob account_name: '{{ Env "BLOB_ACCOUNT_NAME" }}' account_key: '{{ Env "BLOB_ACCOUNT_KEY" }}' container: testcontainer auto_create_container: false # Minio - name: my-minio type: minio access_key: my_access_key_123 secret_key: my_secret_key_123 endpoint: play.minio.com:9000 bucket: ed-test-bucket-minio disable_ssl: true # Force archive destination to use {endpoint}/{bucket} format instead of {bucket}.{endpoint}/ when reaching buckets. s3_force_path_style: true encoding: parquet # supported ones: json, parquet compression: zstd # supported ones: gzip, zstd, snappy, uncompressed # For parquet compression type, this selection will activate only-data compression. However, it will make ingested data non-rehydration compliant. # This is useful to query the log buckets via datawarehouse tools such as Athena/BigQuery. use_native_compression: true - name: my-minio-https type: minio access_key: my_access_key_123 secret_key: my_secret_key_123 endpoint: play.minio.com:9000 bucket: ed-test-bucket-minio s3_force_path_style: true encoding: parquet compression: zstd use_native_compression: true tls: disable_verify: true # Digital Ocean Spaces - name: my-digitalocean-spaces type: dos endpoint: nyc3.digitaloceanspaces.com bucket: ed-test-bucket-dos access_key: my_access_key_123 secret_key: my_secret_key_123 # IBM Object Storage - name: my-ibm-object-storage type: ibmos endpoint: s3-api.us-geo.objectstorage.softlayer.net bucket: ed-test-bucket-ibm access_key: my_access_key_123 secret_key: my_secret_key_123 # Zenko CloudServer - name: my-zenko-cloudserver type: zenko endpoint: https://XXXXXXXXXX.sandbox.zenko.io bucket: ed-test-bucket-zenko access_key: my_access_key_123 secret_key: my_secret_key_123 - name: local-archive type: localstorage mounted_path: "/test/path/i/can/write" # version the config so we can make breaking changes version: v2 # agent internal settings agent_settings: tag: prod log: level: debug secure_logging: true # If enabled, some of the sensitive information will be redacted (can affect performance of agent) # soft_cpu_limit is only honored by clustering processor at the moment. 0.5 means 50% of a core. # it can be enabled by setting cpu_friendly=true in clustering rule. soft_cpu_limit: 0.5 # When anomaly_tolerance is non-zero, anomaly scores handle edge cases better when stddev is too small. Default is 0.01. Can be set at rule level for some rule types. anomaly_tolerance: 0.1 # Anomaly scores will not be calculated for the first 1m after a source is found. Default is 30m. Can be set at rule level for some rule types. anomaly_confidence_period: 1m # Skips empty intervals when rolling so the anomaly scores are calculated based on history of non-zero intervals. Default is true. Can be set at rule level for some rule types. skip_empty_intervals: false # Only report non zero stats. Default is true. Can be set at rule level for some rule types. only_report_nonzeros: false # Anomaly capture size is used for defining logging buffer's size in terms of number of logs. Default is 100. anomaly_capture_size: 1000 # Anomaly capture byte size is used for defining logging buffer's limiting byte size. Default is empty, meaning that limiting conditions does not apply. anomaly_capture_bytesize: "10 KB" # Anomaly capture duration is used for defining logging buffer's limiting duration (for example last 10 minutes logs). Default is empty, meaning that limiting conditions does not apply. anomaly_capture_duration: 1m # Anomaly capture centered is used for flushing the logs AFTER anomaly generation and anomaly capture duration should also be set for using that future. Logs after anomaly capture duration will be flushed if it is set to true. anomaly_capture_centered: true # Anomaly coefficient is used to multiple final score to [0, 100] range. The higher the coefficient the higher the anomaly score will be. Default is 10. Can be set at rule level for some rule types. anomaly_coefficient: 10.0 # capture_flush_mode sets the behavior of flushing captured contextual log buffers. Supported modes are listed below # local_per_source: This is the default mode. captured buffer of a source is flushed when there's a local alert being triggered from same source. # local_all: This is the mode where all captured buffers are flushed when there's a local alert being triggered (not necessarily from same source). So in this mode whenever an alert is triggered from agent all capture buffers from all active sources will be flushed # tag_per_source: This is the mode where captured buffer of a source is flushed when there's an alert from same source and tag (from any agent within current tag) # tag_all: This is the mode where all captured buffers on all agents within the same tag is flushed whenever any of the agents trigger an alert # custom_local_per_group: This is the mode where users can specify groups of sources to flush together when there is a local alert being triggered from one of the sources. capture_flush_mode: custom_local_per_group # specify the custom behavior for capture flush modes starting with custom capture_flush_custom: # to specify groups of input labels to flush together label_grouping: # the names/keys group1,group2 does not matter in terms of behavior. They are just names of separate groups group1: [billing, docker] group2: [errorcheck, google_pubsub_with_key] # mode to use if triggered source label does not belong to any group # optional: default is local_per_source fallback_mode: local_all # ephemeral indicates that this agent's corresponding tailers can be temporarily down (for example due to scale down scenario) and it will be used for agent down scenarios ephemeral: true # persisting_cursor_settings defines persisting cursor locations which is suitable for environments that doesn't want to miss any data during agent restart process # path is the folder place where we create our cursor file and flush_interval is the interval that we will save to this file from memory. persisting_cursor_settings: path: /var/edgedelta/pos file_name: cursor_file.json flush_interval: 1m # attributes key defines user-defined key-value pairs that will be used to designate running agent from others # these key/value pairs are attached to the data collected/generated by the agent and sent to streaming destinations # currently, "environment", "app" and "region" keywords are supported attributes: environment: prod app: smp region: us-west # multiline_max_size defines the multiline buffer size in length. Increase this maximum line number for overflow cases. # in overflow cases all buffered lines dumped as single line, so for these environments it is better to increase this value # integer type multiline_max_size: 250 # multiline_max_bytesize defines the multiline buffer size in bytes. Increase this maximum byte limit for overflow cases. # in overflow cases all buffered lines dumped as single line, so for these environments it is better to increase this value # datasize.Size type multiline_max_bytesize: "10 KB" # max_file_per_glob_path defines the number of maximum file to tail per glob path. Default value is 100 max_file_per_glob_path: 100 # forget_file_after defines the duration after which files will be dropped if there is no change to it. Default value is 1h forget_file_after: 1h # total_seek_capacity defines the maximum size that all tailers can seek concurrently. Default value is 5MB total_seek_capacity: "5 MB" # max_seek_size defines the maximum size that a single tailer can seek per second. Default value is total_seek_capacity * 0.8 max_seek_size: "4 MB" # source_discovery_interval defines the frequency interval that source discovery is invoked. Default value is 5s source_discovery_interval: 5s # file_tailer_buffer_size defines the maximum number of logs a file tailer can store in-memory until they are ingested to agent's internal router. # If router is busy to pick up the logs and the tailer's buffer gets filled then seeking will be blocked. # Default buffer size is 1000 file_tailer_buffer_size: 1000 # router_per_source_buffer_size defines the maximum number of logs agent's internal router can store in-memory per source. Default value is 1000 router_per_source_buffer_size: 1000 # archive_flush_interval defines the interval at which logs are flushed and send to archive destination. Default value is 30m archive_flush_interval: 5m # archive_flush_on_edac indicates whether to flush archive logs to target s3/blob destination when an edac occurs. Default is false. archive_flush_on_edac: false # archive_flush_on_metric_finding indicates whether to flush archive logs to target s3/blob destination when a metric finding is generated. Metric findings are generated by metric alerts that can be configured in the app via Metrics page. Default is true. archive_flush_on_metric_finding: true # archive_max_byte_limit defines the maximum bytes to buffer in memory until triggering an archive flush. # When either archive_flush_interval or archive_max_byte_limit is reached agent flushes the buffered raw logs to configured archive destination(s). # Default byte size limit is 16MB archive_max_byte_limit: "16MB" # source_detection_eviction_period sets the period when a source that is created by source detector will be evicted after no use # Default is 15m source_detection_eviction_period: 15m # When checked, agent(s) will start sending agent related stats (such as CPU and memory consumptions) agent_stats_enabled: true # When checked, agent(s) will expose internal metrics (such as incoming lines, outgoing bytes) in prometheus format on /metrics endpoint # port can be specified with PROM_PORT. Default port is 8087 if not specified. internal_prom_stats_enabled: true # When checked, agent(s) will expose rule metrics (metrics generated from regex processors) in prometheus format on /metrics endpoint # port can be specified with PROM_PORT. Default port is 8087 if not specified. rule_metrics_prom_stats_enabled: true # max_incomplete_line_buffer_size defines maximum data that can be kept in buffered line separator. Default value is around 10KB # This is usefull when we receive json formatted and large inputs. # line_pattern option can be used to separate inputs into valid json objects and when a single line is larger than 10KB this option should be used. max_incomplete_line_buffer_size: "1MB" # This option is only used when aggregator agent is in place. # When checked, incoming metrics (incoming lines, incoming bytes) are sent from processors as is. # When set false, incoming metrics are sent from aggregator agent and following metric columns will be dropped by default # docker_id, pod_name, pod_id, labels.*, annotations.* # Default is false. granular_incoming_metrics_enabled: false # metric_column_opts defines options for metric columns. Currently only column dropping is supported. metric_column_opts: # drop_columns defines metric columns that will not be sent to metric destinations from internal pusher. # This can be used to reduce high cardinality issues. Supports prefix match option with * as terminating character. drop_columns: - name: docker_id - name: labels.* # exceptions is used to fine tune dropped columns. # If a column matches any values given in this option, it will not be dropped. # Supports prefix match option with * as terminating character. exceptions: - labels.app.value - labels.somefield.* # data_preferences settings can be used to override org's data preferences for agent running this config data_preferences: metrics: true edac: true alert: true event: true health: true cluster_pattern: true cluster_sample: true log: true heartbeat: true archive: true agent_self_log: true