# Config v3 sample proposal
# Ordering can be restructured later. Links and nodes will be at the top for now

# This sample as of 1/5/2023 takes the graph flow from the following link:
# https://lucid.app/lucidchart/7a81d31b-182b-4899-9ed8-a73a45ffd6e3/edit?viewport_loc=216%2C-141%2C2048%2C1043%2C0_0&invitationId=inv_921749ea-9a65-48c4-904d-00272fe05727

# all links are defined in this sections

links:
  - from: my_file_input
    to: mask_password
  - from: my_file_input
    to: ed_debug_output
  - from: another_file_input
    to: mask_password
  - from: my_tcp_input
    to: mask_password
  - from: my_udp_input
    to: mask_password
  - from: my_http_input
    to: mask_password
  - from: my_https_input
    to: mask_password
  - from: my_container_input
    to: my_s3

  - from: mask_password
    to: extract_msg_field
  - from: extract_msg_field
    to: mask_credit_card
  - from: extract_msg_field
    to: log_to_metric
  - from: mask_credit_card
    to: example_router
  - from: extract_msg_field
    to: log_to_pattern
  - from: log_to_metric
    to: CN_A_team->CN_A_team_input

  - from: CN_A_team->CN_A_team_output
    to: my_elastic
  - from: example_router
    path: ns=edgedelta
    to: my_s3
  - from: example_router
    path: pre_elastic
    to: my_elastic
  - from: log_to_metric
    to: my_elastic
  - from: log_to_pattern
    to: my_elastic

  - from: ed_agent_stats
    to: ed_metrics
  - from: ed_system_stats
    to: ed_metrics
  - from: ed_container_stats
    to: ed_metrics
  - from: ed_pipeline_io_stats
    to: ed_metrics
  - from: ed_component_health
    to: ed_health
  - from: ed_node_health
    to: ed_health

nodes:
  ### Inputs ###
  - name: my_file_input
    type: file_input
    path: "path/to/my_logs/logs.txt"
    # Detects line patterns automatically based on the Ragel FSM Based Lexical Recognition process. No need to specify line_pattern explicitly.
    auto_detect_line_pattern: true
    # stack trace detector only runs in auto line detection mode
    boost_stacktrace_detection: true
    # These are list of regexes that will be run against glob path's captures and will be discarded if there is a match
    enable_persisting_cursor: true
    # with this flag all files are working individually.
    separate_source: true
    # ingest timestamp if input is JSON format.
    add_ingestion_time: true
    # skip ingestion time when the input is broken or invalid format.
    skip_ingestion_time_on_failure: true
    exclude:
      - "/etc/systemd/system/billingservice/test.log"
      - "/etc/systemd/system/billingservice/dev.log"
    user_description: |
      A user-defined description of the Node. Users may add any additional comments describing the function of their node.
      This is useful to put some notes into the config file since YAML comment lines ("#") are not persisted due to automated marshal/unmarshal of YAML.
  - name: another_file_input
    type: file_input
    path: "/etc/systemd/system/**/*.log"
    # Line pattern helps the system recognize lines.
    # If your lines do not start with a commonly known pattern then provide a value for a better
    #  experience. In this case the logs are JSON-formatted and it is desired to preserve the
    #  timestamp of the log instead of when it was ingested.
    line_pattern: '^{"@timestamp":"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}'
    # stack trace detector only runs in auto line detection mode
    boost_stacktrace_detection: true
    # These are list of regexes that will be run against glob path's captures and will be discarded if there is a match
    enable_persisting_cursor: true
    # with this flag all files are working individually.
    separate_source: true
    exclude:
      - "/etc/systemd/system/billingservice/test.log"
      - "/etc/systemd/system/billingservice/dev.log"
  - name: my_container_input
    type: file_input
    path: "path/to/my_container_logs/logs.json"
    # Field 'log' is extracted from the JSON logs of the container(s).
    docker_mode: true
    # Detects line patterns automatically based on the Ragel FSM Based Lexical Recognition process.
    # No need to specify line_pattern explicitly.
    auto_detect_line_pattern: true
    # Stacktrace detector only runs in auto line detection mode
    boost_stacktrace_detection: true
  - name: my_k8s_input
    type: kubernetes_input
    include:
      - "k8s.pod.name=^apache.*$,k8s.namespace.name=.*web*"
    exclude:
      - "k8s.namespace.name=^kube-nginx$"
      - "k8s.pod.name=.*nginx*,k8s.container.name=testing"
    resource_fields:
      pod_labels:
        - app.kubernetes.io/instance
        - app.kubernetes.io/name
        - helm.sh/chart
        - kubernetes.io/.*
      pod_annotations:
        - cluster-autoscaler.kubernetes.io/safe-to-evict
        - cluster-autoscaler.kubernetes.io/.*
      node_labels:
        - beta.kubernetes.io/arch
        - beta.kubernetes.io/instance-type
        - eks.amazonaws.com/.*
        - topology.kubernetes.io/zone
      namespace_labels:
        - kubernetes.io/metadata.name
        - name
    # If "line_pattern" regex rule is specified in the agent config, then agent process lines not for New Line("\n") but for this specific line separation rule.
    line_pattern: '^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}'
    preserve_original_timestamp: true
    enable_persisting_cursor: true
  - name: fast_demo
    type: demo_input
    events_per_sec: 1
    log_type: "apache_common"
    # List of supported log types are: "apache_combined_simple", "apache_common", "apache_common_json", "apache_combined", "apache_combined_json", "apache_error", "rfc3164", "rfc5424", "common_log", "infoblox_dns_event", "infoblox_dns_response", "api_log", "nginx_log", "redis_log", "clickhouse_log", "json_log", "core_dns_log"
  - name: welcomes_script
    type: exec_input
    run_interval: 10s
    command: "/bin/sh -c"
    script: |
      for i in {1..50}
        do
          echo "Welcome $i times"
        done
  - name: my_tcp_input
    type: tcp_input
    port: 3421
    read_timeout: 10s
    read_size: 10
  - name: otlp
    type: otlp_input
    port: 8585
    data_type: "log" # "log" or "metric", only one should be selected
  - name: my_udp_input
    type: udp_input
    port: 3421
    read_timeout: 10s
  - name: my_http_input
    type: http_input
    port: 8080
    included_paths:
    - /v1/.* # List of regexes that will be used for enabling paths, if not defined all request will be accepted
    authentication:
      strategy: Bearer       # Supported ones: "Bearer" and "Basic"
      secret: "testXYZ"  # Required when "type" is "Bearer"
      username: Alice    # Required when "type" is "Basic"
      password: p@ssword # Required when "type" is "Basic"
  - name: my_https_input
    type: http_input
    port: 8080
    tls:
      ca_file: /var/run/secrets/secure-forward/ca-bundle.crt
      crt_file: /var/run/secrets/secure-forward/tls.crt
      key_file: /var/run/secrets/secure-forward/tls.key
      ignore_certificate_check: true
  - name: my_fluentd_input
    type: fluentd_input
    port: 9898
    read_timeout: 60s
    shared_key: my_shared_key
    tls:
      ca_file: /var/run/secrets/secure-forward/ca-bundle.crt
      crt_file: /var/run/secrets/secure-forward/tls.crt
      key_file: /var/run/secrets/secure-forward/tls.key
      ignore_certificate_check: true
  # k8s_traffic_input can be used to collect pod traffic metrics
  # Agent will use tracer package to listen to communications and produce metrics based on these
  # The dimensions for these metrics are protocol, role, URI, method, response_code, client_namespace, client_controller, server_namespace, server_controller
  # Metrics generated will be "ed_k8s_traffic_latency", "ed_k8s_traffic_communication", "ed_k8s_traffic_in_throughput" and "ed_k8s_traffic_out_throughput"
  # "ed_k8s_traffic_latency" will only be generated for HTTP traffic
  - name: my_kafka_input
    type: kafka_input
    endpoint: "something"
    # Kafka topic to listen to.
    topic: "topic"
    group_id: "my-group"
  - name: ed_k8s_traffic
    type: k8s_traffic_input
    include:
      - "k8s.namespace.name=default"
    exclude:
      - "k8s.namespace.name=^kube-system$"
    # Buffer size to be allocated for eBPF programme. It should be divisible by the page size of the machine agent is running at without any remainder
    # Default is 32KiB (32 * 1024 bytes)
    buffer_size: 32KiB
  # k8s_event_input can be used to k8s events from the cluster
  # Agent will use leader election to select one of the agents and this agent will collect all of the events
  # Events will have 'item.type=event' attribute key-value pair and can be searchable under log search if connected to an "ed_archive_output" node
  - name: ed_k8s_events
    type: k8s_event_input
    report_interval: 1m
  # ED Agent Stats node captures the stats of the agent.
  - name: ed_agent_stats
    type: ed_agent_stats_input
    report_interval: 30s
  # ED System Stats node captures the stats of the system in which the agent is running.
  - name: ed_system_stats
    type: ed_system_stats_input
    report_interval: 1m
  # ED Container Stats node captures the stats of the container in which the agent is running.
  - name: ed_container_stats
    type: ed_container_stats_input
    report_interval: 2m
  # ED Pipeline IO Stats node receives incoming metrics from sources
  #  and outgoing metrics from pushers. Emits these metrics
  #  periodically and by default we have a link from this node to
  #  ED Metrics node which powers various things ED app. This
  #  node can be linked to any destination that supports metric item
  #  type.
  # *NOTE*: It's not recommended to remove the link between ED
  #   Pipeline IO Stats and ED Metrics.
  - name: ed_pipeline_io_stats
    type: ed_pipeline_io_stats_input
  # ED K8s metrics input node receives the metrics data of the k8s cluster.
  - name: ed_k8s_metrics
    type: ed_k8s_metrics_input
    # [Optional] Exclude some of the metric types from the collection.
    # Default collected metric types: [cadvisor, node_exporter, kubelet, kube_state_metrics]
    exclude:
      - node_exporter
    # Interval at which the Edge Delta agent scrapes metric endpoints. Default value is 1m.
    scrape_interval: 1m
  # ED Component Health node receives the health data of each of the components in the graph.
  - name: ed_component_health
    type: ed_component_health_input
  # ED Node Health node receives the health data of each of the nodes in the graph.
  - name: ed_node_health
    type: ed_node_health_input

  - name: example_router
    type: route
    paths:
      # [Required] Path name to distinguish when adding a link from route node
      - path: "pre_elastic"
        # [Required] Expression used to evaluate the log item. Default is CEL expression format.
        condition: regex_match(item["body"], "(?i)ERROR")
        # [Optional] When set to true: if the evaluation returns true, it will be the last evaluation done by route node. Default is false.
        exit_if_matched: true
      - path: "ns=edgedelta"
        condition: item["resource"]["k8s.namespace.name"] == "edgedelta"

  ### Processors ###
  - name: filter_logs_with_regex
    type: regex_filter
    pattern: '{{ Env "PATTERN" }}'
    negate: true
  - name: filter_logs_with_regex_on_subpath
    type: regex_filter
    pattern: "us-(west|east)-2"
    # Dot separated field path to match given pattern. Fields containing dot (.) in it should be escaped via slash (\)
    field_path: environment
  - name: grok_processor
    type: grok
    pattern: "%{APACHE_COMMON}"
  - name: mask_password
    type: mask
    pattern: 'password:\s*(?P<pw>\w+)'
    capture_group_masks:
      - capture_group: pw
        mask: "******"
  - name: extract_msg_field
    type: extract_json_field
    # Field path is a dot separated path of the field (i.e. "log.message"). Its value will be extracted and the original json content will be discarded
    field_path: "message"
    # keep_log_if_failed is useful when filter processes mixed json and non-json logs and non-json logs should be kept and ingested as original
    keep_log_if_failed: true
  - name: extract_first_data
    type: extract_json_field
    # Field path can support indexing extraction as well
    field_path: "records[0].data"
  - name: extract_all_data
    type: extract_json_field
    # field path can support fetching all indexes using [*]. In that case all the items of array will be ingested into agent as separate log
    field_path: "records[*].data"
  - name: mask_credit_card
    type: mask
    predefined_pattern: credit_card

    # The unescape node unescapes characters from a string. This would be useful when working with compressed strings that have escape characters in them
    # or when dealing with escaped json.
    # process_field is an optional parameter, if it is not defined the body field of the payload will be unescaped
    # field_path is an optional parameter defining the location the unescaped value will be upserted to. If field_path is not defined, the unescaped value will be upserted into it's original location.
  - name: unescape_characters
    type: unescape_json
    process_field: item["body"]
    field_path: item.attributes.unescaped

  - name: log_to_metric
    type: log_to_metric
    # Pattern to match for incoming log item, it would match "body" field of it.
    # Given regular expression might include a capture group or not.
    # If one or more dimension group will be defined, then there should be at least one
    # capture group definition.
    pattern: 'error|ERROR|err|ERR service: (?P<service>\w+) duration: (?P<duration>\d+)ms'
    # Interval defines in which time duration this processor flushes metric items.
    # Can also be defined at dimension group level.
    # Its default value is 1m.
    interval: 1m
    # Retention is used for anomaly stats to have a lookback period for previous items.
    # Anything older than given period will be discarded.
    # Can also be defined at dimension group level.
    # Its default value is 3h.
    retention: 3h
    # Metric name will be used for overriding the metric name that will be generated via this processor.
    # If not given, processor name will be used as metric name.
    metric_name: "error"
    # By default, metrics with dimension groups has dimension names and values inside the metric name.
    # By setting dimensions_as_attributes: true, these dimensions will be added to metric item as attributes.
    # Can also be defined at dimension group level.
    dimensions_as_attributes: true
    # Stats to be reported via this processor.
    # Valid options are: "count", "sum", "avg", "min", "max", "p25", "p75", "p95", "p99", "stddev", "anomaly1", "anomaly2", "anomalymin".
    # special stat types:
    #   anomalymin: it takes min of anomaly1 and anomaly2. useful to reduce the alert noise
    # Can also be defined at dimension group level.
    # defaults:
    #   count, anomaly1 and anomaly2 metrics are generated for occurrence captures.
    #   count, min, max, avg, anomaly1 and anomaly2 metrics are generated for numeric captures.
    enabled_stats: ["count", "sum", "anomaly1"]
    # Value adjustment rules define how to change value of any numeric capture group.
    # While running the processor, obtained value will be changed with 'value' variable.
    value_adjustment_rules:
      - numeric_dimension: duration
        expression: "value + 200.0"
    # Group by defines how to aggregate log items based on their properties.
    # Each entry should be an expression (CEL or Go template).
    # When group_by is unset, metrics are grouped by their source (ie. this would be item["resource"]["__group_name"]).
    group_by:
      - item["resource"]["ed.filepath"]
    # Histogram limit defines the maximum number of possible histograms to be created per dimension group
    # or processor itself (if no dimension group is defined). Its default is 1000.
    histogram_limit: 5000
    # Dimension groups is used for grouping up attributes for metrics.
    # There can be one or more dimension group.
    dimension_groups:
      - dimensions: ["service"]
        # the field is the dimension and the value of the dimension is the value of the field
        field_dimensions:
          [
            'item["attributes"]["extracted_field"]',
            'item["attributes"]["extracted_field_two"]',
          ]
        numeric_dimension: "duration"
        # Custom suffix will be added to metric name like <metric name>_<custom suffix>.<stat type>.
        custom_suffix: "by_duration"
        # If a numeric dimension is defined, value adjustment rule will be used primarily.
        value_adjustment_rule: "value * 5.0"
      - dimensions: ["service"]
        # only numeric dimension or field_numeric_dimension can be defined, not both.
        # The dimension will be the field and the numeric capture will be the value of the field.
        field_numeric_dimension: item["attributes"]["numeric_field"]

  # ---------------- Dimension Counter Node Examples ----------------
  # If named captures in regex pattern are dimensions and dimension groups are given, then occurrence stats will be generated.
  # In this example occurrence count for each HTTP method will be generated.
  # http_method_get.count, http_method_post.count...
  - name: http
    type: log_to_metric
    # If you copy paste it into node testing UI, please copy the string between single quotes.
    pattern: '] "(?P<method>\w+)'
    enabled_stats: ["count", "anomalymin"]
    dimension_groups:
      - dimensions: ["method"]

  # This is another dimension counter with dimensions_as_attributes: true.
  # The metrics generated from this node has the same name but different attribute values. The dimension key/value is sent as an attribute.
  # Sample generated metrics are following:
  #   http_single.count     1   {method="get"}
  #   http_single.anomaly1  25  {method="get"}
  #   http_single.count     1   {method="post"}
  #   http_single.anomaly1  25  {method="post"}
  #   http_single.count     2   {httpversion="1.1"}
  #   http_single.anomaly1  25  {httpversion="1.1"}
  #   http_single.count     2   {code="200"}
  #   http_single.anomaly1  25  {code="200"}
  - name: "http_single"
    type: log_to_metric
    # If you copy paste it into node testing UI, please copy the string between single quotes.
    pattern: '] "(?P<method>\w+) (?P<uri>\S*) (?P<httpversion>\S*)" (?P<code>\d+)'
    dimensions_as_attributes: true
    dimension_groups:
      - dimensions: ["method"]
      - dimensions: ["httpversion"]
      - dimensions: ["code"]

  # An example of dimension counter with the use of dimensions_groups to group up attributes for metrics.
  # custom_suffix can be used to customize metric name generated from a dimensions_group.
  # There can be a single or multiple dimensions groups.
  # Sample generated metrics are following:
  # http_group.count      1     {method="get", code="200"}
  # http_group.anomaly1   25    {method="get", code="200"}
  # http_group.count      1     {method="post", code="200"}
  # http_group.anomaly1   25    {method="post", code="200"}
  # http_group_by_version.count      1     {method="get", httpversion="1.1"}
  # http_group_by_version.anomaly1   25    {method="get", httpversion="1.1"}
  # http_group_by_version.count      1     {method="post", httpversion="1.1"}
  # http_group_by_version.anomaly1   25    {method="post", httpversion="1.1"}
  - name: http_group
    type: log_to_metric
    # If you copy paste it into node testing UI, please copy the string between single quotes.
    pattern: '] "(?P<method>\w+) (?P<httpversion>\S*)" (?P<code>\d+)'
    dimensions_as_attributes: true
    dimension_groups:
      - dimensions: ["method", "code"]
      - dimensions: ["method", "httpversion"]
        custom_suffix: "by_version"

  # An example of dimension numeric capture with the use of dimensions_groups to group up attributes for metrics.
  # The numeric dimension can be specified in order the capture numeric data from the logs.
  # Sample log: 2022-08-20 08:21:14.288134 response=201 loglevel=INFO ostype=Unix service=one-packaging-ui source=syslog-test duration=41 svcTime=59128524
  # Sample metrics generated from following processors:
  #   apidata_duration.avg   41  {"service":"one-packaging-ui source=syslog-test", "ostype":"Unix"}
  # With custom_suffix being used, the metric will be like following:
  #   apidata_duration_test_suffix.avg   41  {"service":"one-packaging-ui source=syslog-test", "ostype":"Unix"}
  - name: apidata
    type: log_to_metric
    pattern: ostype=(?P<ostype>\w+).+?service=(?P<service>.+?)\sduration=(?P<duration>\d+)
    dimensions_as_attributes: true
    enabled_stats: ["avg"]
    dimension_groups:
      - dimensions: ["service", "ostype"]
        numeric_dimension: "duration"
        custom_suffix: "test_suffix"

  # ---------------- Dimension Numeric Capture Node Example ----------------
  # If both dimension and numeric captures defined in regex pattern and also in one of the dimension group(s), then numeric stats per dimension per numeric value will be generated.
  # In this example numeric stats for each HTTP method will be generated such as:
  #   http_request_method_get_latency.[avg|min|max|p95|p99|sum], http_request_method_post_latency.[avg|min|max|p95|p99|sum]
  - name: "http_request"
    type: log_to_metric
    # If you copy paste it into node testing UI, please copy the string between single quotes.
    pattern: '] "(?P<method>\w+) took (?P<latency>\d+) ms'
    interval: 1m
    retention: 1h
    skip_empty_intervals: true
    dimension_groups:
      - dimensions: ["method"]
        numeric_dimension: "latency"

  - name: log_to_pattern
    type: log_to_pattern
    num_of_clusters: 15
    samples_per_cluster: 1
    reporting_frequency: 3m
    retire_period: 10m
    throttle_limit_per_sec: 200
    group_by:
      - item["resource"]["src_type"]

  # ---------------- Threshold Node Example ----------------
  # This is an example of threshold node. Input of the node is a metric item (generated from log_to_metric node) and output is a signal.
  # The condition is evaluated for each metric item and if the condition is true, then a signal is generated.
  # Syntax for the "filter" and "condition" field follows the syntax of Common Expression Language (CEL). More info about it can be found here: https://github.com/google/cel-spec/blob/master/doc/langdef.md
  # The filter expression has to refer to the metric item fields using the term "item". For example, item["_name"] refers to the metric item name and item["_value"] refers to the metric item value.
  # The filter expression can also refer to the some other metric item attributes. For example, item["resource"]["k8s.namespace.name"] refers to the k8s namespace of the metric item.
  # The condition expression has to refer to the metric item value using the term "value".
  # The "consecutive" field is optional and it specifies the number of consecutive times that the condition has to be true for the signal to be generated.
  - name: example_threshold
    type: threshold
    filter: item["_name"] == "error.count" && (item["resource"]["k8s.namespace.name"] == "edgedelta" || item["resource"]["k8s.namespace.name"] == "default")
    condition: value > 10
    consecutive: 5

  - name: capture_group_log_level_detection
    type: log_level_detector
    # If you copy paste it into node testing UI, please copy the string between single quotes.
    pattern: ' level=(\w+) '
    fallback_value: "unknown"
  - name: enrichment
    type: enrich
    field_mappings:
      - field_name: pod_id
        value: regex_capture(item["resource"]["ed.filepath"], "/var/logs/(?P<id>(.+))/.*")["id"]
      - field_name: deployment_name
        value: from_k8s(item["pod_id"], "k8s.deployment.name")
      - field_name: namespace
        value: from_k8s(item["pod_id"], "k8s.namespace.name")
      - field_name: instance_id
        value: ec2_metadata("instance-id")
      - field_name: cluster
        value: first_non_empty([env("UNDEFINED_CLUSTER"), env("CLUSTER"), "default-cluster"])
      - field_name: instance_name
        value: gcp_metadata("instance.name")
      - field_name: app_host
        value: item["resource"]["host.name"]
      - field_name: tag
        value: |
          % gotemplate
            {{ .item.resource.ed.tag }}
        override_if_exists: true
      - field_name: service_name
        value: json(item["body"]).messages[0].service
  - name: split_with_comma
    type: split_with_delimiter
    delimiter: ","
  - name: decode_base64
    type: base64_decode
  - name: decompress_gzip
    type: gzip_decompress
  - name: resource_transform
    type: resource_transform
    transformations:
      - field_path: k8s.container.name
        value: json(item["body"]).kubernetes.container.name
      - field_path: k8s.namespace.name
        value: json(item["body"]).kubernetes.namespace
      - field_path: k8s.pod.name
        value: '"-"'

  # ---------------- TopK Node Example ----------------
  - name: top_api_requests
    type: top_k
    pattern: (?P<ip>\d+\.\d+\.\d+\.\d+) - \w+ \[.*\] "(?P<method>\w+) (?P<path>.+) HTTP\/\d.0" (?P<code>.+) \d+
    k: 10
    interval: 30s
    lower_limit: 5
    separator: ","
    group_by:
      - item["resource"]["src_type"]

  # ---------------- OpenTSDB metric Processor Example ----------------
  # This processor is used to generate metrics from OpenTSDB metric data.
  # Supported metric data format is "put <metric> <timestamp> <value> <tagk1=tagv1[ tagk2=tagv2 ...tagkN=tagvN]>"
  # Tags are optional. This processor outputs metrics as is.
  - name: otsdb_metric
    type: opentsdb_parse

  - name: request_error_ratio # calculates ratio with following formula: failure / (failure+success)
    type: ratio
    success_pattern: "request succeeded"
    failure_pattern: "request failed"
    # Interval defines in which time duration this processor flushes metric items.
    # Can also be defined at dimension group level.
    # Its default value is 1m.
    interval: 1m
    # Retention is used for anomaly stats to have a lookback period for previous items.
    # Anything older than given period will be discarded.
    # Can also be defined at dimension group level.
    # Its default value is 3h.
    retention: 3h
    # Metric name will be used for overriding the metric name that will be generated via this processor.
    # If not given, processor name will be used as metric name.
    metric_name: "request_ratio"
    # Group by defines how to aggregate log items based on their properties.
    # Each entry should be an expression (CEL or Go template).
    # When group_by is unset, metrics are grouped by their source (ie. this would be item["resource"]["__group_name"] or item["resource"]["__logical_source"] if aggregator in place).
    group_by:
      - item["resource"]["ed.filepath"]
    # Histogram limit defines the maximum number of possible histograms to be created for this processor. Its default is 1000.
    histogram_limit: 5000

  # Log transforms apply operations to specified fields within logs. Field_path is a dot notated field path
  # that doesn't use the item prefix. field_path references to fields with dots in them can be escaped with /.
  # "body" and "resource" fields are protected and can't be edited. Edits to those fields will return a validation error.
  # Upserts allow CEL expressions for the value. Invalid CEL expressions will result in an empty string for that field. Valid expressions that result in null will be added to the item as empty strings.
  # Operations are applied in the order they are defined.
  # Some example operations are: upsert, delete
  - name: log_transform_msg_as_raw
    type: log_transform
    transformations:
      - field_path: "attributes.host/.name"
        operation: "upsert"
        value: item["_parsed"]["msg"]
      - field_path: "role"
        operation: "delete"

  # Output transforms apply operations to specified fields within logs. Field_path is a dot notated field path
  # that does not use the item prefix. field_path references to fields with dots in them can be escaped with /.
  # Upserts allow CEL expressions for the value. Invalid CEL expressions will result in an empty string for that field. Valid expressions that result in null will be added to the item as empty strings.
  # Operations are applied in the order they are defined.
  # Some example operations are: upsert, delete
  - name: output_transform
    type: output_transform
    transformations:
      - field_path: "_ed"
        operation: "upsert"
        value: item["attributes"]
      - field_path: "raw_updated.[0]"
        operation: "upsert"
        value: item["_parsed"]["msg"]
      - field_path: "raw"
        operation: "delete"

  # splunk_mapper node allows users to specify which fields they want to fill in for each splunk payload field using a CEL expression.
  # The field names match those of the splunk hec event endpoint, prepended with splunk_
  # splunk_event accepts map, string or byte array, splunk_fields accepts maps, splunk_time accepts int64 (unix milli) or string (ISO 8601 - date string)
  # all other fields accept strings
  # splunk_event is required
  - name: splunk_mapping_node
    type: splunk_mapper
    splunk_source_type: item["resource"]["src_type"]
    splunk_source: item["resource"]["__src_name"]
    splunk_host: item["resource"]["host.name"]
    splunk_index: '"prod_index"'
    splunk_event: regex_capture(item["body"], "kubernetes.container.image=(?P<field>[\\w-]+)").field
    splunk_fields: item["attributes"]
    splunk_time: item["timestamp"]

  # datadog_mapper node allows users to specify which fields they want to fill in for each datadog payload field using a CEL expression.
  # the field names match those of the datadog log endpoint, prepended with dd_
  # dd_message is required
  # dd_tags accepts a map or a key value string("key1:value1,key2:value2"), dd_message accepts a string, map, or byte array
  # all other fields accept a string
  - name: datadog_mapping_node
    type: datadog_mapper
    dd_message: item["body"]
    dd_tags: item["attributes"]
    dd_source: item["resource"]["src_type"]
    dd_source_name: item["resource"]["__src_name"]
    dd_host: json(item["_parsed"]).host
    dd_hostname: json(item["_parsed"]).hostname
    dd_service: '"K8s"'
    dd_level: env("LEVEL")
    dd_datatype: item["datatype"]

  # json_parse_attributes node can be used to parse fully structured logs as attributes for log items. This will be a nested object that will be added under the attributes.
  # The field to parse on can be specified. Default is item["body"]
  # field_path can be used to specify the location to upsert the parsed object. Default is item["attributes"]
  - name: parse_json
    type: parse_json_attributes
    process_field: item["body"]
    field_path: item["attributes"]["parsed_body"]

  # field_path is the field that has the json to be unrolled, optional if not defined the item["body"] will be used
  # json_field_path is the field within the json that describes the location of the array to be unrolled
  - name: json_unroller
    type: json_unroll
    field_path: item["attributes"]["newBody"]
    json_field_path: content["records"]
    new_field_name: record

  # compound node holds nodes and links.
  # A compound node can only have one input node, and its type is compound_input.
  # A compound node can have many output nodes, and their type is compound_output.
  - name: CN_A_team
    type: compound
    nodes:
      # There can only be 1 input node, and its type is compound_input
      - name: CN_A_team_input
        type: compound_input
      # There can be many output nodes. Each can be named to be used like paths.
      - name: CN_A_team_output
        type: compound_output
    links:
      - from: CN_A_team_input
        to: CN_A_team_output

  ### Outputs ###
  - name: my_s3
    type: s3_output
    aws_key_id: abc
    aws_sec_key: xyz
    bucket: testbucket
    region: us-east-2
  - name: my_elastic
    type: elastic_output
    index: "index name"
    user: elastic
    password: '{{Env "ELASTIC_PASS"}}'
    address:
      - elasticnode1
    tls:
      ignore_certificate_check: true
  - name: sumo_us
    type: sumologic_output
    endpoint: "https://endpoint4.collection.us2.sumologic.com/receiver/v1/http/XYZ"
  - name: sumo_us_legacy
    type: sumologic_output
    endpoint: "https://endpoint4.collection.us2.sumologic.com/receiver/v1/http/XYZ"
    # This field is used by output nodes to support agent v0 push format instead of agent v1 push format
    # for backwards compatibility reasons. When this field is used, item format should be assumed to have
    # 1-1 correspondence between v0 format.
    use_legacy_formatting: true
    # Custom tags will be only honored when "use_legacy_formatting: true"
    custom_tags:
      - name: "app"
        value: "transaction_manager"
      - name: "region"
        value: "us-west-2"
      - name: "File Path"
        value: "{{.FileGlobPath}}"
      - name: "K8s PodName"
        value: "{{.K8sPodName}}"
      - name: "K8s Namespace"
        value: "{{.K8sNamespace}}"
      - name: "K8s ControllerKind"
        value: "{{.K8sControllerKind}}"
      - name: "K8s ContainerName"
        value: "{{.K8sContainerName}}"
      - name: "K8s ContainerImage"
        value: "{{.K8sContainerImage}}"
      - name: "K8s ControllerLogicalName"
        value: "{{.K8sControllerLogicalName}}"
      - name: "ECSCluster"
        value: "{{.ECSCluster}}"
      - name: "ECSContainerName"
        value: "{{.ECSContainerName}}"
      - name: "ECSTaskVersion"
        value: "{{.ECSTaskVersion}}"
      - name: "ECSTaskFamily"
        value: "{{.ECSTaskFamily}}"
      - name: "DockerContainerName"
        value: "{{.DockerContainerName}}"
      - name: "ConfigID"
        value: "{{.ConfigID}}"
      # One can use either curly braces for templatizing (which has limited support for only direct variable replacement and index function) or square braces where one can employ full power of Go's template language
      - name: "Host"
        value: "[[ .Host ]]"
      - name: "Source"
        value: "[[ .Source ]]"
      - name: "SourceType"
        value: "[[ .SourceType ]]"
      - name: "Tag"
        value: "[[ .Tag ]]"
  - name: openmetrics
    type: openmetrics_output
    endpoint: "http://localhost:8428/metrics"
    features: metric
  - name: openmetrics_legacy
    type: openmetrics_output
    endpoint: "http://localhost:8428/metrics"
    features: metric
    use_legacy_formatting: true
    custom_tags:
      - name: "app"
        value: "test"
      - name: "region"
        value: "us-west-2"
      - name: "File Path"
        value: "{{.FileGlobPath}}"
      - name: "K8s PodName"
        value: "{{.K8sPodName}}"
      - name: "K8s Namespace"
        value: "{{.K8sNamespace}}"
      - name: "K8s ControllerKind"
        value: "{{.K8sControllerKind}}"
      - name: "K8s ContainerName"
        value: "{{.K8sContainerName}}"
      - name: "K8s ContainerImage"
        value: "{{.K8sContainerImage}}"
      - name: "K8s ControllerLogicalName"
        value: "{{.K8sControllerLogicalName}}"
      - name: "ECSCluster"
        value: "{{.ECSCluster}}"
      - name: "ECSContainerName"
        value: "{{.ECSContainerName}}"
      - name: "ECSTaskVersion"
        value: "{{.ECSTaskVersion}}"
      - name: "ECSTaskFamily"
        value: "{{.ECSTaskFamily}}"
      - name: "DockerContainerName"
        value: "{{.DockerContainerName}}"
      - name: "ConfigID"
        value: "{{.ConfigID}}"
      - name: "Host"
        value: "{{.Host}}"
      - name: "Source"
        value: "{{.Source}}"
      - name: "SourceType"
        value: "{{.SourceType}}"
      - name: "Tag"
        value: "{{.Tag}}"
  - name: my_s3_archiver
    type: s3_output
    aws_key_id: abc
    aws_sec_key: xyz
    bucket: testbucket
    region: us-east-2
    # This flag will disable metadata file ingestion (which is used for rehydration analysis). It is useful for AWS Athena where only data files should be present in the given S3 bucket + path prefix
    disable_metadata_ingestion: true
    # Default path prefix is <Year>/<Month>/<Day>/<Hour>/<Tag>/, however with this field this can be overridden
    path_prefix:
      # One can refer to "Year", "Month", "Day", "Hour", "<any number that can divide 60> Minute", "tag", "host", "OtherTags.<other enrichment tag keys>" and "LogFields.<log transform fields>"
      # For ECS, "ecs_cluster", "ecs_container_name", "ecs_task_family" and "ecs_task_version" are available
      # For K8s, "k8s_namespace", "k8s_controller_kind", "k8s_controller_logical_name", "k8s_pod_name", "k8s_container_name" are "k8s_container_image" are available
      # For Docker, "docker_container_name" and "docker_image_name" are available
      order:
        - Year
        - Month
        - Day
        - Hour
        - 5 Minute
        - OtherTags.role
      # Format should have exactly same amount of "%s"s with "order"s count and templating will be done in the same order given via "order" fields
      # Curly braces are prohibited
      # Also, note that this format is not supported in rehydrations so the source for rehydration cannot be an integration using custom path_prefix format
      # This should be used for some Big Data applications such BigQuery, AWS Athena etc.
      format: year=%s/month=%s/day=%s/hour=%s/minute=%s/role=%s/
  - name: my_s3_with_different_flush_settings
    type: s3_output
    aws_key_id: abc
    aws_sec_key: xyz
    bucket: testbucket
    region: us-east-2
    flush_interval: 10m
    max_byte_limit: "8MB"
  - name: splunk_output
    type: splunk_output
    endpoint: https://localhost:443/services/collector/event
    token: my_api_token
    index: main
  - name: datadog_output
    type: datadog_output
    api_key: my_api_key
  - name: datadog_output_endpoints
    type: datadog_output
    api_key: my_api_key
    log_endpoint: https://http-intake.logs.datadoghq.com/v1/input
    metric_endpoint: https://api.datadoghq.com/api/v1/series
    event_endpoint: https://api.datadoghq.com/api/v1/events
  - name: datadog_output_single_endpoint
    type: datadog_output
    api_key: my_api_key
    log_endpoint: https://http-intake.logs.datadoghq.com/v1/input
  - name: my_gcs
    type: gcs_output
    hmac_access_key: my_hmac_access_key_123
    hmac_secret: my_hmac_secret_123
    bucket: ed-test-bucket
  - name: my_blob
    type: blob_output
    account_name: blob-account-name
    account_key: blobkey123!&
    container: testcontainer
    auto_create_container: false
  # Minio
  - name: my_minio
    type: minio_output
    access_key: my_access_key_123
    secret_key: my_secret_key_123
    endpoint: play.minio.com:9000
    bucket: ed-test-bucket-minio
    disable_ssl: true
    # Force archive destination to use {endpoint}/{bucket} format instead of {bucket}.{endpoint}/ when reaching buckets.
    s3_force_path_style: true
    encoding: parquet
    compression: zstd
    use_native_compression: true
  - name: my_minio_https
    type: minio_output
    access_key: my_access_key_123
    secret_key: my_secret_key_123
    endpoint: play.minio.com:9000
    bucket: ed-test-bucket-minio
    s3_force_path_style: true
    encoding: parquet
    compression: zstd
    use_native_compression: true
    tls:
      ignore_certificate_check: true
  # Digital Ocean Spaces
  - name: my_digitalocean_spaces
    type: digitalocean_spaces_output
    endpoint: nyc3.digitaloceanspaces.com
    bucket: ed-test-bucket-dos
    access_key: my_access_key_123
    secret_key: my_secret_key_123
  # IBM Object Storage
  - name: my_ibm_object_storage
    type: ibm_object_storage_output
    endpoint: s3-api.us-geo.objectstorage.softlayer.net
    bucket: ed-test-bucket-ibm
    access_key: my_access_key_123
    secret_key: my_secret_key_123
  # Zenko CloudServer
  - name: my_zenko_cloudserver
    type: zenko_output
    endpoint: https://XXXXXXXXXX.sandbox.zenko.io
    bucket: ed-test-bucket-zenko
    access_key: my_access_key_123
    secret_key: my_secret_key_123
  - name: local_archive
    type: localstorage_output
    mounted_path: "/test/path/i/can/write"
  # Slack
  - name: my_slack
    type: slack_output
    endpoint: https://hooks.slack.com/services/...
    suppression_window: 30m
  - name: my_legacy_slack
    type: slack_output
    endpoint: https://hooks.slack.com/services/...
    suppression_window: 30m
    use_legacy_formatting: true
    notify_content:
      title: "Anomaly Detected: {{.ProcessorDescription}}"
      disable_default_fields: true
      advanced_content: |
        {
          "blocks": [
            {
              "type": "section",
              "text": {
                "type": "mrkdwn",
                "text": "*Raw POST Anomaly Detected: [[ .ProcessorDescription ]]*"
              }
            },
            {
              "type": "section",
              "text": {
                "type": "mrkdwn",
                [[ if contains .MatchedTerm "error" ]]
                "text": "*MatchedTerm* [[ .MatchedTerm ]]\n*ConfigID* [[ .ConfigID ]]"
                [[ else ]]
                "text": "*ERROR MatchedTerm* [[ .MatchedTerm ]]\n*ConfigID* [[ .ConfigID ]]"
                [[ end ]]
              }
            }
          ]
        }
      custom_fields:
        "Dashboard": "https://app.edgedelta.com/investigation?edac={{.EDAC}}&timestamp={{.Timestamp}}"
        "Current Value": "{{.CurrentValue}}"
        "Threshold Value": "{{.ThresholdValue}}"
        "Custom Message": "{{.CurrentValue}} exceeds {{.ThresholdValue}}"
        "Built-in Threshold Description": "{{.ThresholdDescription}}"
        "Matched Term": "{{.MatchedTerm}}"
        "Threshold Type": "{{.ThresholdType}}"
        "File Path": "{{.FileGlobPath}}"
        "K8s PodName": "{{.K8sPodName}}"
        "K8s Namespace": "{{.K8sNamespace}}"
        "K8s ControllerKind": "{{.K8sControllerKind}}"
        "K8s ContainerName": "{{.K8sContainerName}}"
        "K8s ContainerImage": "{{.K8sContainerImage}}"
        "K8s ControllerLogicalName": "{{.K8sControllerLogicalName}}"
        "ECSCluster": "{{.ECSCluster}}"
        "ECSContainerName": "{{.ECSContainerName}}"
        "ECSTaskVersion": "{{.ECSTaskVersion}}"
        "ECSTaskFamily": "{{.ECSTaskFamily}}"
        "DockerContainerName": "{{.DockerContainerName}}"
        "SourceAttributes": "{{.SourceAttributes}}"
        "ConfigID": "{{.ConfigID}}"
        "EDAC": "{{.EDAC}}"
        "Epoch": "{{.Epoch}}"
        "Host": "{{.Host}}"
        "MetricName": "{{.MetricName}}"
        "Source": "{{.Source}}"
        "SourceType": "{{.SourceType}}"
        "Tag": "{{.Tag}}"
  - name: my_teams
    type: teams_output
    endpoint: https://outlook.office.com/webhook/...
    suppression_window: 30m
  - name: my_webhook
    type: webhook_output
    endpoint: https://my.webhook.com
    suppression_window: 30m
    headers:
      - header: Content-Type
        value: application/json
    payload: |
      {
        "type": "message",
        "attachments": [
          {
            "contentType": "application/vnd.microsoft.card.adaptive",
            "content": {
              "$schema": "http://adaptivecards.io/schemas/adaptive-card.json",
              "type": "AdaptiveCard",
              "version": "1.2",
              "body": [
                {
                  "type": "ColumnSet",
                  "spacing": "large",
                  "columns": [
                    {
                      "width": "stretch",
                      "items": [
                        {
                          "type": "TextBlock",
                          "wrap": true,
                          "text": "**{{ .item._alert.title }}**"
                        },
                        {
                          "type": "TextBlock",
                          "wrap": true,
                          "text": "**Description**: {{ js .item._alert.description }}\n\r**Tag**: {{ .item.resource.ed.tag }}\n\r**Host**: {{ .item.resource.host.name }}\n\r**Signal ID**: {{ .item._alert.signal_id }}\n\r**Threshold filter**: {{ js .item._alert.threshold_filter }}\n\r**Threshold condition**: {{ .item._alert.threshold_condition }}\n\r**Metric name**: {{ .item._alert.name }}\n\r**Value**: {{ .item._alert.value }}"
                        }
                      ]
                    }
                  ]
                }
              ]
            }
          }
        ]
      }
  # port outputs
  - name: my_tcp_port
    type: tcp_output
    host: log_repo_host
    port: 23131
  - name: my_http_port
    type: http_output
    endpoint: http://localhost:4545/v0/collect
    headers:
      - header: X-Token
        value: abc123456

  # fluentd output
  - name: my_fluentd
    type: fluentd_output
    host: log-repo-host
    port: 23131
    pool_size: 10
    # tag_prefix; agent setting tag value is appended to this prefix
    # and used as fluentd forward tag (the payload itself will still have edgedelta_tag=agentsettings.tag)
    # tag_prefix is only used as fluentd tag if the corresponding data doesn't have a tag defined in enrichments
    tag_prefix: "tail.ed."

  - name: my_kafka_output
    type: kafka_output
    endpoint: localhost:2888,localhost:3888 # brokers
    topic: example_kafka_topic
  - name: my_newrelic_output
    type: newrelic_output
    api_key: my_api_key
  - name: my_loki_output
    type: loki_output
    endpoint: http://localhost:3100/loki/api/v1/push
    user: 123456
    api_key: my_api_key

  - name: my_google_cloud_logging_output
    type: google_cloud_logging_output
    log_name: projects/my-project/logs/my-log-bucket
    credentials_path: /path/to/credentials.json
    labels:
      - name: k8s_namespace
        path: item["resource"]["k8s.namespace.name"]
      - name: k8s_container_name
        path: item["resource"]["k8s.container.name"]
      - name: k8s_pod_name
        path: item["resource"]["k8s.pod.name"]
      - name: k8s_deployment_name
        path: item["resource"]["k8s.deployment.name"]
      - name: k8s_daemonset_name
        path: item["resource"]["k8s.daemonset.name"]
      - name: k8s_statefulset_name
        path: item["resource"]["k8s.statefulset.name"]
      - name: k8s_replicaset_name
        path: item["resource"]["k8s.replicaset.name"]
      - name: k8s_cronjob_name
        path: item["resource"]["k8s.cronjob.name"]
      - name: k8s_job_name
        path: item["resource"]["k8s.job.name"]

  # prometheus exporter output
  - name: my_prometheus_exporter
    type: prometheus_exporter_output
    port: 8087
    retire_period: 15m
    labels:
      - name: k8s_namespace
        path: item["resource"]["k8s.namespace.name"]
      - name: k8s_container_name
        path: item["resource"]["k8s.container.name"]
      - name: k8s_pod_name
        path: item["resource"]["k8s.pod.name"]
      - name: k8s_deployment_name
        path: item["resource"]["k8s.deployment.name"]
      - name: k8s_daemonset_name
        path: item["resource"]["k8s.daemonset.name"]
      - name: k8s_statefulset_name
        path: item["resource"]["k8s.statefulset.name"]
      - name: k8s_replicaset_name
        path: item["resource"]["k8s.replicaset.name"]
      - name: k8s_cronjob_name
        path: item["resource"]["k8s.cronjob.name"]
      - name: k8s_job_name
        path: item["resource"]["k8s.job.name"]

  - name: ed_patterns
    type: ed_patterns_output
  - name: ed_archive
    type: ed_archive_output
  - name: ed_metrics
    type: ed_metrics_output
  - name: ed_health
    type: ed_health_output
  - name: ed_debug_output
    type: ed_debug_output

# The rest of the first-level structures of v2 config below (parent/child config, agent settings, version)
# This is a placeholder - these pieces should be revisited on how should they behave in config v3

version: v3

settings:
  tag: prod
  log:
    level: debug
  # persisting_cursor_settings defines persisting cursor locations which is suitable for environments that doesn't want to miss any data during agent restart process.
  # path is the folder place where we create our cursor file and flush_interval is the interval that we will save to this file from memory.
  persisting_cursor_settings:
    path: /var/edgedelta/pos
    file_name: cursor_file.json
    flush_interval: 1m
  # Defines the interval at which logs are flushed and send to archive destination.
  # Default value is 30m.
  archive_flush_interval: 5m
  # Defines the maximum bytes to buffer in memory until triggering an archive flush.
  # When either archive_flush_interval or archive_max_byte_limit is reached agent flushes the buffered raw logs to configured archive destination(s).
  # Default byte size limit is 16MB.
  archive_max_byte_limit: "16MB"
  # Defines the frequency interval that source discovery is invoked.
  # Default value is 5s.
  source_discovery_interval: 5s
  # When anomaly_tolerance is non-zero, anomaly scores handle edge cases better when std. dev. is too small.
  # Default is 0.01.
  # Can be set at node level and/or dimension group level for some log_to_metric nodes.
  anomaly_tolerance: 0.1
  # Anomaly scores will not be calculated for the first 1m after a source is found.
  # Default is 30m.
  # Can be set at node level and/or dimension group level for some log_to_metric nodes.
  anomaly_confidence_period: 1m
  # Skips empty intervals when rolling so the anomaly scores are calculated based on history of non-zero intervals.
  # Default is true.
  # Can be set at node level and/or dimension group level for some log_to_metric nodes.
  skip_empty_intervals: false
  # Only report non zero stats.
  # Default is true.
  # Can be set at node level and/or dimension group level for some log_to_metric nodes.
  only_report_nonzeros: false
  # Anomaly coefficient is used to multiple final score to [0, 100] range.
  # The higher the coefficient the higher the anomaly score will be.
  # Default is 10.
  # Can be set at node level and/or dimension group level for some log_to_metric nodes.
  anomaly_coefficient: 10.0
  # Interval that item buffers will flush their contents.
  # Default is 5s.
  item_buffer_flush_interval: 5s
  # Size limit that will trigger item buffer flush when reached.
  # Default is 1MiB.
  item_buffer_max_byte_limit: 1MiB
  # multiline_max_size defines the multiline buffer size in length. Increase this maximum line number for overflow cases.
  # in overflow cases all buffered lines dumped as single line, so for these environments it is better to increase this value
  # integer type
  multiline_max_size: 250
  # multiline_max_byte_size defines the multiline buffer size in bytes. Increase this maximum byte limit for overflow cases.
  # in overflow cases all buffered lines dumped as single line, so for these environments it is better to increase this value
  # datasize.Size type
  multiline_max_byte_size: "10KB"
  # max_incomplete_line_buffer_size defines maximum data that can be kept in buffered line separator. Default value is around 10KB.
  # This is useful when we receive json formatted and large inputs.
  # line_pattern option can be used to separate inputs into valid json objects and when a single line is larger than 10KB this option should be used.
  max_incomplete_line_buffer_size: "10KB"
  # metric_column_opts defines options for metric columns. Currently only column dropping is supported.
  metric_column_opts:
    # drop_columns defines metric columns that will not be sent to metric destinations from internal pusher.
    # This can be used to reduce high cardinality issues. Supports prefix match option with * as terminating character.
    drop_columns:
      - name: docker_id
        # metric_categories is used to define on which metric category the drop operation will be performed (optional)
        # if not given, column will be dropped from all metric categories.
        # valid categories: incoming_outgoing, heartbeat, processor, resource
        metric_categories:
          - incoming_outgoing
      - name: labels.*
        # exceptions is used to fine tune dropped columns. (optional)
        # If a column matches any values given in this option, it will not be dropped.
        # Supports prefix match option with * as terminating character.
        exceptions:
          - labels.app.value
          - labels.somefield.*