diff --git a/docs/sources/LogExtendedEventFormat/index.md b/docs/sources/LogExtendedEventFormat/index.md new file mode 100644 index 0000000..282afb1 --- /dev/null +++ b/docs/sources/LogExtendedEventFormat/index.md @@ -0,0 +1,109 @@ +# Vendor - Log Extended Event Format + +## Product - Various products that send LEEF V1 and V2 format messages via syslog + +Each LEEF product should have their own source entry in this documentation set by vendor. In a departure +from normal configuration, all LEEF products should use the "LEEF" version of the unique port and +archive environment variable settings (rather than a unique one per product), as the LEEF log path +handles all products sending events to SC4S in the LEEF format. Examples of this include QRadar itself +as well as other legacy systems. Therefore, the LEEF environment variables for unique port, archive, etc. +should be set only _once_. + +If your deployment has multiple LEEF devices that send to more than one port, +set the LEEF unique port variable(s) as a comma-separated list. See [Unique Listening Ports](https://splunk-connect-for-syslog.readthedocs.io/en/develop/sources/#unique-listening-ports) +for details. + +The source documentation included below is a reference baseline for any product that sends data +using the LEEF log path. + +Some vendors implement LEEF v2.0 format events incorrectly, omitting the required "key=value" seperator field +from the LEEF header, thus forcing the consumer to assume the default tab `\t` character. +SC4S will correctly process this omission, but will not correctly process other non-compliant formats. + +The LEEF format allows for the inclusion of a field `devTime` containing the device timestamp and allows the sender to +also specify the format of this timestamp in another field called `devTimeFormat`, which uses the Java Time format. +SC4S uses syslog-ng strptime format which is not directly translatable to the Java Time format. Therefore, SC4S has +provided support for the following common formats. If needed, additional time formats can be requested via an issue on +github. + +``` + '%s.%f', + '%s', + '%b %d %H:%M:%S.%f', + '%b %d %H:%M:%S', + '%b %d %Y %H:%M:%S.%f', + '%b %e %Y %H:%M:%S', + '%b %e %H:%M:%S.%f', + '%b %e %H:%M:%S', + '%b %e %Y %H:%M:%S.%f', + '%b %e %Y %H:%M:%S' +``` + + +| Ref | Link | +|----------------|---------------------------------------------------------------------------------------------------------| +| Splunk Add-on LEEF | None | +| Product Manual | https://www.ibm.com/support/knowledgecenter/SS42VS_DSM/com.ibm.dsm.doc/c_LEEF_Format_Guide_intro.html | + + +### Splunk Metadata with LEEF events + +The keys (first column) in `splunk_metadata.csv` for LEEF data sources have a slightly different meaning than those for non-LEEF ones. +The typical `vendor_product` syntax is instead replaced by checks against specific columns of the LEEF event -- namely the first and +second, columns following the leading `LEEF:VERSION` ("column 0"). These specific columns refer to the LEEF `device_vendor`, +and `device_product`, respectively. + +`device_vendor`\_`device_product` + + +Here is a snippet of a sample LANCOPE event in LEEF 2.0 format: +``` +<111>Apr 19 10:29:53 3.3.3.3 LEEF:2.0|Lancope|StealthWatch|1.0|41|^|src=192.0.2.0^dst=172.50.123.1^sev=5^cat=anomaly^srcPort=81^dstPort=21^usrName=joe.black +``` +and the corresponding match in `splunk_metadata.csv`: +``` +Lancope_StealthWatch,source,lancope:stealthwatch +``` + +### Default Sourcetype + +| sourcetype | notes | +|----------------|---------------------------------------------------------------------------------------------------------| +| LEEF:1 | Common sourcetype for all LEEF v1 events | +| LEEF:2:`` | Common sourcetype for all LEEF v2 events `separator` is the printable literal or hex value of the seperator used in the event | + +### Default Source + +| source | notes | +|----------------|---------------------------------------------------------------------------------------------------------| +| `vendor`:`product` | Varies | + +### Default Index Configuration + +| key | source | index | notes | +|----------------|----------------|----------------|----------------| +| Vendor_Product | Varies | main | none | + +### Filter type + +MSG Parse: This filter parses message content + +### Options + +| Variable | default | description | +|----------------|----------------|----------------| +| SC4S_LISTEN_LEEF_UDP_PORT | empty string | Enable a UDP port for this specific vendor product using a comma-separated list of port numbers | +| SC4S_LISTEN_LEEF_TCP_PORT | empty string | Enable a TCP port for this specific vendor product using a comma-separated list of port numbers | +| SC4S_LISTEN_LEEF_TLS_PORT | empty string | Enable a TLS port for this specific vendor product using a comma-separated list of port numbers | +| SC4S_ARCHIVE_LEEF | no | Enable archive to disk for this specific source | +| SC4S_DEST_LEEF_HEC | no | When Splunk HEC is disabled globally set to yes to enable this specific source | + +### Verification + +An active site will generate frequent events use the following search to check for new events + +Verify timestamp, and host values match as expected + +``` +index= (sourcetype=LEEF:* source=) +``` diff --git a/package/etc/conf.d/filters/log_event_extended_format/leef.conf.tmpl b/package/etc/conf.d/filters/log_event_extended_format/leef.conf.tmpl new file mode 100644 index 0000000..cd449e6 --- /dev/null +++ b/package/etc/conf.d/filters/log_event_extended_format/leef.conf.tmpl @@ -0,0 +1,4 @@ + +filter f_leef { + program('LEEF'); +}; \ No newline at end of file diff --git a/package/etc/conf.d/log_paths/lp-log_extended_event_format.conf.tmpl b/package/etc/conf.d/log_paths/lp-log_extended_event_format.conf.tmpl new file mode 100644 index 0000000..f15ee46 --- /dev/null +++ b/package/etc/conf.d/log_paths/lp-log_extended_event_format.conf.tmpl @@ -0,0 +1,182 @@ +# Common Event Format +{{- /* The following provides a unique port source configuration if env var(s) are set */}} +{{- $context := dict "port_id" "LEEF" "parser" "rfc3164" }} +{{- tmpl.Exec "t/source_network.t" $context }} + +parser p_leef1_header { + csv-parser( + columns("fields.leef_version", "fields.leef_device_vendor", "fields.leef_device_product", "fields.leef_device_version","fields.EventID", MESSAGE) + delimiters(chars("|")) + template(t_legacy_hdr_msg) + flags(strip-whitespace, escape-none, greedy) + ); +}; +parser p_leef2_header { + csv-parser( + columns("fields.leef_version", "fields.leef_device_vendor", "fields.leef_device_product", "fields.leef_device_version","fields.EventID",".separator", MESSAGE) + delimiters(chars("|")) + flags(strip-whitespace, escape-none, greedy, drop-invalid) + ); +}; +#This header is invalid but happens per spec +parser p_leef2alt_header { + csv-parser( + columns("fields.leef_version", "fields.leef_device_vendor", "fields.leef_device_product", "fields.leef_device_version","fields.EventID", MESSAGE) + delimiters(chars("|")) + flags(strip-whitespace, escape-none, greedy) + ); +}; + +template t_leef_event { + template("${.leef.event}"); +}; + + +python { + +import re +import binascii +class leef_kv(object): + + + def init(self, options): + self.regex = r"( ?(?:[A-Z]{2,4}T|HAEC|IDLW|MSK|NT|UTC|THA))" + return True + + def parse(self, log_message): + + try: + msg = log_message['MESSAGE'].decode("utf-8") + # All LEEF message are | separated super structures + structure = msg.split('|') + # Indexed fields for Splunk + log_message['fields.leef_version'] = structure[0] + log_message['fields.leef_vendor'] = structure[1] + log_message['fields.leef_product'] = structure[2] + log_message['fields.leef_product_version'] = structure[3] + log_message['fields.leef_EventID'] = structure[4] + #We just want the event field + event = structure[len(structure)-1] + log_message['.leef.event'] = event + # V1 will always use tab + if structure[0].startswith("1"): + separator="\t" + lv = "1" + else: + lv = "2" + #V2 messages should always provide the sep but some fail do comply + #with the format spec if they don't assume tab + if len(structure) == 6 or not structure[5]: + separator="\t" + else: + separator=structure[5] + if separator.startswith("0"): + separator = separator[1:] + + + if separator.startswith("x"): + hex_sep = f"0{separator.lower()}" + else: + hex_sep = f'0x{binascii.b2a_hex(separator.encode("utf-8")).decode("utf-8").lower()}' + if structure[0].startswith("1"): + log_message['.splunk.sourcetype'] = f"LEEF:{lv}" + else: + log_message['.splunk.sourcetype'] = f"LEEF:{lv}:{hex_sep}" + log_message['.splunk.source'] = f"{structure[2]}:{structure[3]}" + log_message['fields.sc4s_vendor_product'] = f"{structure[2]}:{structure[3]}" + + pairs = event.split(separator) + for p in pairs: + f,v = p.split("=", 1) + if f == "devTime": + log_message[".leef." + f] = re.sub(self.regex, "" , v, 0, re.MULTILINE) + else: + log_message[".leef." + f] = v + except Exception as e: + log_message['.fields.leef_exception']=str(e) + pass + + # return True, other way message is dropped + return True + +}; + + +parser p_leef_kv { + python( + class("leef_kv") + ); +}; + + +parser p_leef_devTime { + date-parser-nofilter(format( + '%s.%f', + '%s', + '%b %d %H:%M:%S', + '%b %d %H:%M:%S.%f', + '%b %d %Y %H:%M:%S.%f' + ) + template("${.leef.devTime}") + ); +}; + + +log { + junction { +{{- if or (or (getenv (print "SC4S_LISTEN_LEEF_TCP_PORT")) (getenv (print "SC4S_LISTEN_LEEF_UDP_PORT"))) (getenv (print "SC4S_LISTEN_LEEF_TLS_PORT")) }} + channel { + # Listen on the specified dedicated port(s) for leef traffic + source (s_leef); + flags (final); + }; +{{- end}} + channel { + # Listen on the default port (typically 514) for leef traffic + source (s_DEFAULT); + filter(f_leef); + flags(final); + }; + }; + + # LEEF has two offical versions however in the wild we see "hybrid" + # Syntax where V2 is used but the separator is not provided and assumed + # to be \t + parser (p_leef_kv); + if { + filter { + match(".{4,}" value(".leef.devTime")); + }; + parser (p_leef_devTime); + }; + + + # leef TAs use the source as their bounds in props.conf + # leef source entries in splunk_metadata.csv should reflect the proper source as well as sourcetype + + parser { + p_add_context_splunk(key("${fields.leef_device_vendor}_${fields.leef_device_product}")); + }; + + parser (compliance_meta_by_source); + + rewrite { set("$(template ${.splunk.sc4s_template} $(template t_leef_event))" value("MSG")); }; + +{{- if or (conv.ToBool (getenv "SC4S_DEST_SPLUNK_HEC_GLOBAL" "yes")) (conv.ToBool (getenv "SC4S_DEST_LEEF_HEC" "no")) }} + destination(d_hec); +{{- end}} + +{{- if or (conv.ToBool (getenv "SC4S_ARCHIVE_GLOBAL" "no")) (conv.ToBool (getenv "SC4S_ARCHIVE_leef" "no")) }} + destination(d_archive); +{{- end}} + +{{- if (print (getenv "SC4S_DEST_GLOBAL_ALTERNATES")) }} + {{ getenv "SC4S_DEST_GLOBAL_ALTERNATES" | regexp.ReplaceLiteral "^" "destination(" | regexp.ReplaceLiteral "[, ]+" ");\n destination(" }}); +{{- end }} + +{{- if (print (getenv "SC4S_DEST_LEEF_ALTERNATES")) }} + {{ getenv "SC4S_DEST_LEEF_ALTERNATES" | regexp.ReplaceLiteral "^" "destination(" | regexp.ReplaceLiteral "[, ]+" ");\n destination(" }}); +{{- end }} + + flags(flow-control,final); +}; diff --git a/tests/test_leef.py b/tests/test_leef.py new file mode 100644 index 0000000..aa914ba --- /dev/null +++ b/tests/test_leef.py @@ -0,0 +1,142 @@ +# Copyright 2019 Splunk, Inc. +# +# Use of this source code is governed by a BSD-2-clause-style +# license that can be found in the LICENSE-BSD2 file or at +# https://opensource.org/licenses/BSD-2-Clause +import random +import pytest +from jinja2 import Environment + +from .sendmessage import * +from .splunkutils import * +from .timeutils import * + +env = Environment() + +# <13>Jan 18 11:07:53 192.168.1.1 LEEF:1.0|Microsoft|MSExchange|4.0 SP1|15345|src=192.0.2.0 dst=172.50.123.1 sev=5cat=anomaly srcPort=81 dstPort=21 usrName=joe.black +# Jan 18 11:07:53 myhostname LEEF:1.0|Microsoft|MSExchange|4.0 SP1|15345|src=192.0.2.0 dst=172.50.123.1 sev=5 cat=anomaly srcPort=81 dstPort=21 usrName=joe.black +# <13>Jan 18 11:07:53 192.168.1.1 LEEF:2.0|Vendor|Product|Version|EventID|^|src=192.0.2.0^dst=172.50.123.1^sev=5cat=anomaly^srcPort=81^dstPort=21^usrName=joe.black +# Jan 18 11:07:53 myhostname LEEF:2.0|Vendor|Product|Version|EventID|^|src=192.0.2.0^dst=172.50.123.1^sev=5cat=anomaly^srcPort=81^dstPort=21^usrName=joe.black +testdata1 = [ + "{{ mark }}{{ bsd }} {{ host }} LEEF:1.0|Vendor|Product|Version|EventID|src=192.0.2.0\tdst=172.50.123.1\tsev=5\tcat=anomaly\tsrcPort=81\tdstPort=21\tusrName=joe.black", + "{{ bsd }} {{ host }} LEEF:1.0|Vendor|Product|Version|EventID|src=192.0.2.0\tdst=172.50.123.1\tsev=5\tcat=anomaly\tsrcPort=81\tdstPort=21\tusrName=joe.black", + "{{ mark }}1 {{ iso }} {{ host }} LEEF:1.0|Vendor|Product|Version|EventID|src=192.0.2.0\tdst=172.50.123.1\tsev=5\tcat=anomaly\tsrcPort=81\tdstPort=21\tusrName=joe.black", + "{{ mark }}1 {{ iso }} {{ host }} LEEF:1.0|Vendor|Product|Version|EventID|src=192.0.2.0\tdst=172.50.123.1\tsev=5\tcat=anomaly\tsrcPort=81\tdstPort=21\tusrName=joe.black", +] +# <13>1 2019-01-18T11:07:53.520Z 192.168.1.1 LEEF:1.0|Microsoft|MSExchange|4.0 SP1|15345|src=192.0.2.0 dst=172.50.123.1 sev=5cat=anomaly srcPort=81 dstPort=21 usrName=joe.black +# <133>1 2019-01-18T11:07:53.520+07:00 myhostname LEEF:1.0|Microsoft|MSExchange|4.0 SP1|15345|src=192.0.2.0 dst=172.50.123.1 sev=5cat=anomaly srcPort=81 dstPort=21 usrName=joe.black +# <13>1 2019-01-18T11:07:53.520Z 192.168.1.1 LEEF:2.0|Vendor|Product|Version|EventID|^|src=192.0.2.0^dst=172.50.123.1^sev=5cat=anomaly^srcPort=81^dstPort=21^usrName=joe.black +# <133>1 2019-01-18T11:07:53.520+07:00 myhostname LEEF:2.0|Vendor|Product|Version|EventID|^|src=192.0.2.0^dst=172.50.123.1^sev=5cat=anomaly^srcPort=81^dstPort=21^usrName=joe.black +testdata2 = [ + "{{ mark }}{{ bsd }} {{ host }} LEEF:2.0|Vendor|Product|Version|EventID|^|src=192.0.2.0^dst=172.50.123.1^sev=5^cat=anomaly^srcPort=81^dstPort=21^usrName=joe.black", + "{{ bsd }} {{ host }} LEEF:2.0|Vendor|Product|Version|EventID|^|src=192.0.2.0^dst=172.50.123.1^sev=5^cat=anomaly^srcPort=81^dstPort=21^usrName=joe.black", + "{{ mark }}1 {{ iso }} {{ host }} LEEF:2.0|Vendor|Product|Version|EventID|^|src=192.0.2.0^dst=172.50.123.1^sev=5^cat=anomaly^srcPort=81^dstPort=21^usrName=joe.black", + "{{ mark }}1 {{ iso }} {{ host }} LEEF:2.0|Vendor|Product|Version|EventID|^|src=192.0.2.0^dst=172.50.123.1^sev=5^cat=anomaly^srcPort=81^dstPort=21^usrName=joe.black", + "{{ mark }}1 {{ iso }} {{ host }} LEEF:2.0|Vendor|Product|Version|EventID|0x5E|src=192.0.2.0^dst=172.50.123.1^sev=5^cat=anomaly^srcPort=81^dstPort=21^usrName=joe.black", + "{{ mark }}1 {{ iso }} {{ host }} LEEF:2.0|Vendor|Product|Version|EventID|x5E|src=192.0.2.0^dst=172.50.123.1^sev=5^cat=anomaly^srcPort=81^dstPort=21^usrName=joe.black", + "{{ mark }}1 {{ iso }} {{ host }} LEEF:2.0|Vendor|Product|Version|EventID||src=192.0.2.0\tdst=172.50.123.1\tsev=5\tcat=anomaly\tsrcPort=81\tdstPort=21\tusrName=joe.black", + "{{ mark }}1 {{ iso }} {{ host }} LEEF:2.0|Vendor|Product|Version|EventID|src=200.0.2.0\tdst=172.50.123.1\tsev=5\tcat=anomaly\tsrcPort=81\tdstPort=21\tusrName=joe.black", +] +# The following samples test "raw time" parsing +testdata3 = [ + "{{ mark }} Jan 1 01:01:00 {{ host }} LEEF:1.0|Vendor|Product|Version|EventID|src=192.0.2.0\tdst=172.50.123.1\tsev=5\tcat=anomaly\tsrcPort=81\tdstPort=21\tusrName=joe.black\tdevTime={{ epoch }}", + "{{ mark }}1 2019-01-18T11:07:53.520Z {{ host }} LEEF:2.0|Vendor|Product|Version|EventID|^|src=200.0.2.0^dst=172.50.123.1^sev=5^cat=anomaly^srcPort=81^dstPort=21^usrName=joe.black^devTime={{ epoch }}", + "{{ mark }}1 2019-01-18T11:07:53.520Z {{ host }} LEEF:2.0|Vendor|Product|Version|EventID|^|src=200.0.2.0^dst=172.50.123.1^sev=5^cat=anomaly^srcPort=81^dstPort=21^usrName=joe.black^devTime={{ epoch }}^devTimeFormat=", + "{{ mark }}1 2019-01-18T11:07:53.520Z {{ host }} LEEF:2.0|Vendor|Product|Version|EventID|^|src=200.0.2.0^dst=172.50.123.1^sev=5^cat=anomaly^srcPort=81^dstPort=21^usrName=joe.black^devTime={{ bsd }}^devTimeFormat=MMM dd yyyy HH:mm:ss", + "{{ mark }}1 2019-01-18T11:07:53.520Z {{ host }} LEEF:2.0|Vendor|Product|Version|EventID|^|src=200.0.2.0^dst=172.50.123.1^sev=5^cat=anomaly^srcPort=81^dstPort=21^usrName=joe.black^devTime={{ bsd }}.000^devTimeFormat=MMM dd yyyy HH:mm:ss.SSS", + "{{ mark }}1 2019-01-18T11:07:53.520Z {{ host }} LEEF:2.0|Vendor|Product|Version|EventID|^|src=200.0.2.0^dst=172.50.123.1^sev=5^cat=anomaly^srcPort=81^dstPort=21^usrName=joe.black^devTime={{ bsd }}.000 EST^devTimeFormat=MMM dd yyyy HH:mm:ss.SSS z", +] + + +@pytest.mark.parametrize("event", testdata1) +def test_leef1_generic( + record_property, setup_wordlist, setup_splunk, setup_sc4s, event +): + host = "{}-{}".format(random.choice(setup_wordlist), random.choice(setup_wordlist)) + + dt = datetime.datetime.now() + iso, bsd, time, date, tzoffset, tzname, epoch = time_operations(dt) + + # Tune time functions + iso = iso[0:19] + iso[26:32] + epoch = epoch[:-7] + + mt = env.from_string(event + "\n") + message = mt.render(mark="<111>", bsd=bsd, host=host, iso=iso) + + sendsingle(message, setup_sc4s[0], setup_sc4s[1][514]) + + st = env.from_string( + 'search _time={{ epoch }} index=main host="{{ host }}" sourcetype="LEEF:1"' + ) + search = st.render(epoch=epoch, host=host) + + resultCount, eventCount = splunk_single(setup_splunk, search) + + record_property("host", host) + record_property("resultCount", resultCount) + record_property("message", message) + + assert resultCount == 1 + + +@pytest.mark.parametrize("event", testdata2) +def test_leef2_generic( + record_property, setup_wordlist, setup_splunk, setup_sc4s, event +): + host = "{}-{}".format(random.choice(setup_wordlist), random.choice(setup_wordlist)) + + dt = datetime.datetime.now() + iso, bsd, time, date, tzoffset, tzname, epoch = time_operations(dt) + + # Tune time functions + iso = iso[0:19] + iso[26:32] + epoch = epoch[:-7] + + mt = env.from_string(event + "\n") + message = mt.render(mark="<111>", bsd=bsd, host=host, iso=iso) + + sendsingle(message, setup_sc4s[0], setup_sc4s[1][514]) + + st = env.from_string( + 'search _time={{ epoch }} index=main host="{{ host }}" sourcetype="LEEF:2:*"' + ) + search = st.render(epoch=epoch, host=host) + + resultCount, eventCount = splunk_single(setup_splunk, search) + + record_property("host", host) + record_property("resultCount", resultCount) + record_property("message", message) + + assert resultCount == 1 + + +@pytest.mark.parametrize("event", testdata3) +def test_leef_devtime(record_property, setup_wordlist, setup_splunk, setup_sc4s, event): + host = "{}-{}".format(random.choice(setup_wordlist), random.choice(setup_wordlist)) + + dt = datetime.datetime.now() + iso, bsd, time, date, tzoffset, tzname, epoch = time_operations(dt) + + # Tune time functions + iso = iso[0:19] + iso[26:32] + epoch = epoch[:-7] + + mt = env.from_string(event + "\n") + message = mt.render(mark="<111>", bsd=bsd, host=host, iso=iso, epoch=epoch) + + sendsingle(message, setup_sc4s[0], setup_sc4s[1][514]) + + st = env.from_string( + 'search _time={{ epoch }} index=main host="{{ host }}" sourcetype="LEEF:*"' + ) + search = st.render(epoch=epoch, host=host) + + resultCount, eventCount = splunk_single(setup_splunk, search) + + record_property("host", host) + record_property("resultCount", resultCount) + record_property("message", message) + + assert resultCount == 1