From bdda8691ffae944b6fd9f148f0e2ea1602352d65 Mon Sep 17 00:00:00 2001 From: blotus Date: Wed, 8 Jun 2022 15:16:58 +0200 Subject: [PATCH] New syslog parser for syslog datasource (#1554) --- .../syslog/internal/parser/rfc3164/parse.go | 255 +++++++++++ .../internal/parser/rfc3164/parse_test.go | 370 ++++++++++++++++ .../internal/parser/rfc3164/perf_test.go | 62 +++ .../syslog/internal/parser/rfc5424/parse.go | 398 ++++++++++++++++++ .../internal/parser/rfc5424/parse_test.go | 269 ++++++++++++ .../internal/parser/rfc5424/perf_test.go | 103 +++++ .../syslog/internal/parser/utils/utils.go | 76 ++++ .../internal/{ => server}/syslogserver.go | 0 pkg/acquisition/modules/syslog/syslog.go | 88 ++-- pkg/acquisition/modules/syslog/syslog_test.go | 86 ++-- 10 files changed, 1615 insertions(+), 92 deletions(-) create mode 100644 pkg/acquisition/modules/syslog/internal/parser/rfc3164/parse.go create mode 100644 pkg/acquisition/modules/syslog/internal/parser/rfc3164/parse_test.go create mode 100644 pkg/acquisition/modules/syslog/internal/parser/rfc3164/perf_test.go create mode 100644 pkg/acquisition/modules/syslog/internal/parser/rfc5424/parse.go create mode 100644 pkg/acquisition/modules/syslog/internal/parser/rfc5424/parse_test.go create mode 100644 pkg/acquisition/modules/syslog/internal/parser/rfc5424/perf_test.go create mode 100644 pkg/acquisition/modules/syslog/internal/parser/utils/utils.go rename pkg/acquisition/modules/syslog/internal/{ => server}/syslogserver.go (100%) diff --git a/pkg/acquisition/modules/syslog/internal/parser/rfc3164/parse.go b/pkg/acquisition/modules/syslog/internal/parser/rfc3164/parse.go new file mode 100644 index 000000000..3b59a806b --- /dev/null +++ b/pkg/acquisition/modules/syslog/internal/parser/rfc3164/parse.go @@ -0,0 +1,255 @@ +package rfc3164 + +import ( + "fmt" + "time" + + "github.com/crowdsecurity/crowdsec/pkg/acquisition/modules/syslog/internal/parser/utils" +) + +type RFC3164Option func(*RFC3164) + +type RFC3164 struct { + PRI int + Timestamp time.Time + Hostname string + Tag string + Message string + PID string + // + len int + position int + buf []byte + useCurrentYear bool //If no year is specified in the timestamp, use the current year + strictHostname bool //If the hostname contains invalid characters or is not an IP, return an error +} + +const PRI_MAX_LEN = 3 + +//Order is important: format with the most information must be first because we will stop on the first match +var VALID_TIMESTAMPS = []string{ + time.RFC3339, + "Jan 02 15:04:05 2006", + "Jan _2 15:04:05 2006", + "Jan 02 15:04:05", + "Jan _2 15:04:05", +} + +func WithCurrentYear() RFC3164Option { + return func(r *RFC3164) { + r.useCurrentYear = true + } +} + +func WithStrictHostname() RFC3164Option { + return func(r *RFC3164) { + r.strictHostname = true + } +} + +func (r *RFC3164) parsePRI() error { + + pri := 0 + + if r.buf[r.position] != '<' { + return fmt.Errorf("PRI must start with '<'") + } + + r.position++ + + for r.position < r.len { + c := r.buf[r.position] + if c == '>' { + r.position++ + break + } + if c < '0' || c > '9' { + return fmt.Errorf("PRI must be a number") + } + pri = pri*10 + int(c-'0') + r.position++ + } + + if pri > 999 { + return fmt.Errorf("PRI must be up to 3 characters long") + } + + if r.position == r.len && r.buf[r.position-1] != '>' { + return fmt.Errorf("PRI must end with '>'") + } + + r.PRI = pri + return nil +} + +func (r *RFC3164) parseTimestamp() error { + validTs := false + for _, layout := range VALID_TIMESTAMPS { + tsLen := len(layout) + if r.position+tsLen > r.len { + continue + } + t, err := time.Parse(layout, string(r.buf[r.position:r.position+tsLen])) + if err == nil { + validTs = true + r.Timestamp = t + r.position += tsLen + break + } + } + if !validTs { + return fmt.Errorf("timestamp is not valid") + } + if r.useCurrentYear { + if r.Timestamp.Year() == 0 { + r.Timestamp = time.Date(time.Now().Year(), r.Timestamp.Month(), r.Timestamp.Day(), r.Timestamp.Hour(), r.Timestamp.Minute(), r.Timestamp.Second(), r.Timestamp.Nanosecond(), r.Timestamp.Location()) + } + } + r.position++ + return nil +} + +func (r *RFC3164) parseHostname() error { + hostname := []byte{} + for r.position < r.len { + c := r.buf[r.position] + if c == ' ' { + r.position++ + break + } + hostname = append(hostname, c) + r.position++ + } + if r.strictHostname { + if !utils.IsValidHostnameOrIP(string(hostname)) { + return fmt.Errorf("hostname is not valid") + } + } + if len(hostname) == 0 { + return fmt.Errorf("hostname is empty") + } + r.Hostname = string(hostname) + return nil +} + +//We do not enforce tag len as quite a lot of syslog client send tags with more than 32 chars +func (r *RFC3164) parseTag() error { + tag := []byte{} + tmpPid := []byte{} + pidEnd := false + hasPid := false + for r.position < r.len { + c := r.buf[r.position] + if !utils.IsAlphaNumeric(c) { + break + } + tag = append(tag, c) + r.position++ + } + if len(tag) == 0 { + return fmt.Errorf("tag is empty") + } + r.Tag = string(tag) + + if r.position == r.len { + return nil + } + + c := r.buf[r.position] + if c == '[' { + hasPid = true + r.position++ + for r.position < r.len { + c = r.buf[r.position] + if c == ']' { + pidEnd = true + r.position++ + break + } + if c < '0' || c > '9' { + return fmt.Errorf("pid inside tag must be a number") + } + tmpPid = append(tmpPid, c) + r.position++ + } + } + + if hasPid && !pidEnd { + return fmt.Errorf("pid inside tag must be closed with ']'") + } + + if hasPid { + r.PID = string(tmpPid) + } + return nil +} + +func (r *RFC3164) parseMessage() error { + err := r.parseTag() + if err != nil { + return err + } + + if r.position == r.len { + return fmt.Errorf("message is empty") + } + + c := r.buf[r.position] + + if c == ':' { + r.position++ + } + + for { + if r.position >= r.len { + return fmt.Errorf("message is empty") + } + c := r.buf[r.position] + if c != ' ' { + break + } + r.position++ + } + + message := r.buf[r.position:r.len] + r.Message = string(message) + return nil +} + +func (r *RFC3164) Parse(message []byte) error { + r.len = len(message) + if r.len == 0 { + return fmt.Errorf("message is empty") + } + r.buf = message + + err := r.parsePRI() + if err != nil { + return err + } + + err = r.parseTimestamp() + if err != nil { + return err + } + + err = r.parseHostname() + if err != nil { + return err + } + + err = r.parseMessage() + if err != nil { + return err + } + + return nil +} + +func NewRFC3164Parser(opts ...RFC3164Option) *RFC3164 { + r := &RFC3164{} + for _, opt := range opts { + opt(r) + } + return r +} diff --git a/pkg/acquisition/modules/syslog/internal/parser/rfc3164/parse_test.go b/pkg/acquisition/modules/syslog/internal/parser/rfc3164/parse_test.go new file mode 100644 index 000000000..f0dff4c05 --- /dev/null +++ b/pkg/acquisition/modules/syslog/internal/parser/rfc3164/parse_test.go @@ -0,0 +1,370 @@ +package rfc3164 + +import ( + "testing" + "time" +) + +func TestPri(t *testing.T) { + tests := []struct { + input string + expected int + expectedErr string + }{ + {"<0>", 0, ""}, + {"<19>", 19, ""}, + {"<200>", 200, ""}, + {"<4999>", 0, "PRI must be up to 3 characters long"}, + {"<123", 0, "PRI must end with '>'"}, + {"123>", 0, "PRI must start with '<'"}, + {"", 0, "PRI must be a number"}, + } + + for _, test := range tests { + t.Run(test.input, func(t *testing.T) { + r := &RFC3164{} + r.buf = []byte(test.input) + r.len = len(r.buf) + err := r.parsePRI() + if err != nil { + if test.expectedErr != "" { + if err.Error() != test.expectedErr { + t.Errorf("expected error %s, got %s", test.expectedErr, err.Error()) + } + } else { + t.Errorf("unexpected error: %s", err.Error()) + } + } else { + if test.expectedErr != "" { + t.Errorf("expected error %s, got no error", test.expectedErr) + } else { + if r.PRI != test.expected { + t.Errorf("expected %d, got %d", test.expected, r.PRI) + } + } + } + }) + } +} + +func TestTimestamp(t *testing.T) { + tests := []struct { + input string + expected string + expectedErr string + currentYear bool + }{ + {"May 20 09:33:54", "0000-05-20T09:33:54Z", "", false}, + {"May 20 09:33:54", "2022-05-20T09:33:54Z", "", true}, + {"May 20 09:33:54 2022", "2022-05-20T09:33:54Z", "", false}, + {"May 1 09:33:54 2022", "2022-05-01T09:33:54Z", "", false}, + {"May 01 09:33:54 2021", "2021-05-01T09:33:54Z", "", true}, + {"foobar", "", "timestamp is not valid", false}, + } + + for _, test := range tests { + t.Run(test.input, func(t *testing.T) { + opts := []RFC3164Option{} + if test.currentYear { + opts = append(opts, WithCurrentYear()) + } + r := NewRFC3164Parser(opts...) + r.buf = []byte(test.input) + r.len = len(r.buf) + err := r.parseTimestamp() + if err != nil { + if test.expectedErr != "" { + if err.Error() != test.expectedErr { + t.Errorf("expected error %s, got %s", test.expectedErr, err.Error()) + } + } else { + t.Errorf("unexpected error: %s", err.Error()) + } + } else { + if test.expectedErr != "" { + t.Errorf("expected error %s, got no error", test.expectedErr) + } else { + if r.Timestamp.Format(time.RFC3339) != test.expected { + t.Errorf("expected %s, got %s", test.expected, r.Timestamp.Format(time.RFC3339)) + } + } + } + }) + } +} + +func TestHostname(t *testing.T) { + tests := []struct { + input string + expected string + expectedErr string + strictHostname bool + }{ + {"127.0.0.1", "127.0.0.1", "", false}, + {"::1", "::1", "", false}, + {"foo.-bar", "", "hostname is not valid", true}, + {"foo-.bar", "", "hostname is not valid", true}, + {"foo123.bar", "foo123.bar", "", true}, + {"a..", "", "hostname is not valid", true}, + {"foo.bar", "foo.bar", "", false}, + {"foo,bar", "foo,bar", "", false}, + {"foo,bar", "", "hostname is not valid", true}, + {"", "", "hostname is empty", false}, + {".", ".", "", true}, + {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "", "hostname is not valid", true}, + {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.bla", "", "hostname is not valid", true}, + {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.bla", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.bla", "", false}, + {"a.foo-", "", "hostname is not valid", true}, + } + + for _, test := range tests { + t.Run(test.input, func(t *testing.T) { + opts := []RFC3164Option{} + if test.strictHostname { + opts = append(opts, WithStrictHostname()) + } + r := NewRFC3164Parser(opts...) + r.buf = []byte(test.input) + r.len = len(r.buf) + err := r.parseHostname() + if err != nil { + if test.expectedErr != "" { + if err.Error() != test.expectedErr { + t.Errorf("expected error %s, got %s", test.expectedErr, err.Error()) + } + } else { + t.Errorf("unexpected error: %s", err.Error()) + } + } else { + if test.expectedErr != "" { + t.Errorf("expected error %s, got no error", test.expectedErr) + } else { + if r.Hostname != test.expected { + t.Errorf("expected %s, got %s", test.expected, r.Hostname) + } + } + } + }) + } +} + +func TestTag(t *testing.T) { + tests := []struct { + input string + expected string + expectedPID string + expectedErr string + }{ + {"foobar", "foobar", "", ""}, + {"foobar[42]", "foobar", "42", ""}, + {"", "", "", "tag is empty"}, + {"foobar[", "", "", "pid inside tag must be closed with ']'"}, + {"foobar[42", "", "", "pid inside tag must be closed with ']'"}, + {"foobar[asd]", "foobar", "", "pid inside tag must be a number"}, + } + + for _, test := range tests { + t.Run(test.input, func(t *testing.T) { + r := &RFC3164{} + r.buf = []byte(test.input) + r.len = len(r.buf) + err := r.parseTag() + if err != nil { + if test.expectedErr != "" { + if err.Error() != test.expectedErr { + t.Errorf("expected error %s, got %s", test.expectedErr, err.Error()) + } + } else { + t.Errorf("unexpected error: %s", err.Error()) + } + } else { + if test.expectedErr != "" { + t.Errorf("expected error %s, got no error", test.expectedErr) + } else { + if r.Tag != test.expected { + t.Errorf("expected %s, got %s", test.expected, r.Tag) + } + if r.PID != test.expectedPID { + t.Errorf("expected %s, got %s", test.expected, r.Message) + } + } + } + }) + } +} + +func TestMessage(t *testing.T) { + tests := []struct { + input string + expected string + expectedErr string + }{ + {"foobar: pouet", "pouet", ""}, + {"foobar[42]: test", "test", ""}, + {"foobar[123]: this is a test", "this is a test", ""}, + {"foobar[123]: ", "", "message is empty"}, + {"foobar[123]:", "", "message is empty"}, + } + + for _, test := range tests { + t.Run(test.input, func(t *testing.T) { + r := &RFC3164{} + r.buf = []byte(test.input) + r.len = len(r.buf) + err := r.parseMessage() + if err != nil { + if test.expectedErr != "" { + if err.Error() != test.expectedErr { + t.Errorf("expected error %s, got %s", test.expectedErr, err.Error()) + } + } else { + t.Errorf("unexpected error: %s", err.Error()) + } + } else { + if test.expectedErr != "" { + t.Errorf("expected error %s, got no error", test.expectedErr) + } else { + if r.Message != test.expected { + t.Errorf("expected message %s, got %s", test.expected, r.Tag) + } + } + } + }) + } +} + +func TestParse(t *testing.T) { + type expected struct { + Timestamp time.Time + Hostname string + Tag string + PID string + Message string + PRI int + } + tests := []struct { + input string + expected expected + expectedErr string + opts []RFC3164Option + }{ + { + "<12>May 20 09:33:54 UDMPRO,a2edd0c6ae48,udm-1.10.0.3686 kernel: foo", expected{ + Timestamp: time.Date(0, time.May, 20, 9, 33, 54, 0, time.UTC), + Hostname: "UDMPRO,a2edd0c6ae48,udm-1.10.0.3686", + Tag: "kernel", + PID: "", + Message: "foo", + PRI: 12, + }, "", []RFC3164Option{}, + }, + { + "<12>May 20 09:33:54 UDMPRO,a2edd0c6ae48,udm-1.10.0.3686 kernel: foo", expected{ + Timestamp: time.Date(2022, time.May, 20, 9, 33, 54, 0, time.UTC), + Hostname: "UDMPRO,a2edd0c6ae48,udm-1.10.0.3686", + Tag: "kernel", + PID: "", + Message: "foo", + PRI: 12, + }, "", []RFC3164Option{WithCurrentYear()}, + }, + { + "<12>May 20 09:33:54 UDMPRO,a2edd0c6ae48,udm-1.10.0.3686 kernel: foo", expected{}, "hostname is not valid", []RFC3164Option{WithStrictHostname()}, + }, + { + "foobar", expected{}, "PRI must start with '<'", []RFC3164Option{}, + }, + { + "<12>", expected{}, "timestamp is not valid", []RFC3164Option{}, + }, + { + "<12 May 02 09:33:54 foo.bar", expected{}, "PRI must be a number", []RFC3164Option{}, + }, + { + "<12>May 02 09:33:54", expected{}, "hostname is empty", []RFC3164Option{}, + }, + { + "<12>May 02 09:33:54 foo.bar", expected{}, "tag is empty", []RFC3164Option{}, + }, + { + "<12>May 02 09:33:54 foo.bar bla[42", expected{}, "pid inside tag must be closed with ']'", []RFC3164Option{}, + }, + { + "<12>May 02 09:33:54 foo.bar bla[42]", expected{}, "message is empty", []RFC3164Option{}, + }, + { + "<12>May 02 09:33:54 foo.bar bla[42]: ", expected{}, "message is empty", []RFC3164Option{}, + }, + { + "<12>May 02 09:33:54 foo.bar bla", expected{}, "message is empty", []RFC3164Option{}, + }, + { + "<12>May 02 09:33:54 foo.bar bla:", expected{}, "message is empty", []RFC3164Option{}, + }, + { + "", expected{}, "message is empty", []RFC3164Option{}, + }, + { + `<13>1 2021-05-18T11:58:40.828081+02:00 mantis sshd 49340 - [timeQuality isSynced="0" tzKnown="1"] blabla`, expected{}, "timestamp is not valid", []RFC3164Option{}, + }, + { + `<46>Jun 2 06:55:39 localhost haproxy[27213]: Connect from 100.100.100.99:52611 to 100.100.100.99:443 (https_shared-merged/HTTP)\\n 10.0.0.1}`, expected{ + Timestamp: time.Date(time.Now().Year(), time.June, 2, 6, 55, 39, 0, time.UTC), + Hostname: "localhost", + Tag: "haproxy", + PID: "27213", + Message: `Connect from 100.100.100.99:52611 to 100.100.100.99:443 (https_shared-merged/HTTP)\\n 10.0.0.1}`, + PRI: 46, + }, "", []RFC3164Option{WithCurrentYear()}, + }, + { + `<46>Jun 2 06:55:39 2022 localhost haproxy[27213]: Connect from 100.100.100.99:52611 to 100.100.100.99:443 (https_shared-merged/HTTP)\\n 10.0.0.1}`, expected{ + Timestamp: time.Date(2022, time.June, 2, 6, 55, 39, 0, time.UTC), + Hostname: "localhost", + Tag: "haproxy", + PID: "27213", + Message: `Connect from 100.100.100.99:52611 to 100.100.100.99:443 (https_shared-merged/HTTP)\\n 10.0.0.1}`, + PRI: 46, + }, "", []RFC3164Option{}, + }, + } + + for _, test := range tests { + t.Run(test.input, func(t *testing.T) { + r := NewRFC3164Parser(test.opts...) + err := r.Parse([]byte(test.input)) + if err != nil { + if test.expectedErr != "" { + if err.Error() != test.expectedErr { + t.Errorf("expected error '%s', got '%s'", test.expectedErr, err.Error()) + } + } else { + t.Errorf("unexpected error: '%s'", err.Error()) + } + } else { + if test.expectedErr != "" { + t.Errorf("expected error '%s', got no error", test.expectedErr) + } else { + if r.Timestamp != test.expected.Timestamp { + t.Errorf("expected timestamp '%s', got '%s'", test.expected.Timestamp, r.Timestamp) + } + if r.Hostname != test.expected.Hostname { + t.Errorf("expected hostname '%s', got '%s'", test.expected.Hostname, r.Hostname) + } + if r.Tag != test.expected.Tag { + t.Errorf("expected tag '%s', got '%s'", test.expected.Tag, r.Tag) + } + if r.PID != test.expected.PID { + t.Errorf("expected pid '%s', got '%s'", test.expected.PID, r.PID) + } + if r.Message != test.expected.Message { + t.Errorf("expected message '%s', got '%s'", test.expected.Message, r.Message) + } + if r.PRI != test.expected.PRI { + t.Errorf("expected pri '%d', got '%d'", test.expected.PRI, r.PRI) + } + } + } + }) + } +} diff --git a/pkg/acquisition/modules/syslog/internal/parser/rfc3164/perf_test.go b/pkg/acquisition/modules/syslog/internal/parser/rfc3164/perf_test.go new file mode 100644 index 000000000..3805090f5 --- /dev/null +++ b/pkg/acquisition/modules/syslog/internal/parser/rfc3164/perf_test.go @@ -0,0 +1,62 @@ +package rfc3164 + +import "testing" + +var e error + +func BenchmarkParse(b *testing.B) { + tests := []struct { + input []byte + opts []RFC3164Option + }{ + { + []byte("<12>May 20 09:33:54 UDMPRO,a2edd0c6ae48,udm-1.10.0.3686 kernel: [1670546.400229] IN=eth9 OUT= MAC=24:5a:4c:7b:0a:4c:34:27:92:67:0f:2b:08:00 SRC=79.124.62.34 DST=x.x.x.x LEN=44 TOS=0x00 PREC=0x00 TTL=243 ID=37520 PROTO=TCP SPT=55055 DPT=51443 WINDOW=1024 RES=0x00 SYN URGP=0"), []RFC3164Option{}, + }, + { + []byte("<12>May 20 09:33:54 UDMPRO,a2edd0c6ae48,udm-1.10.0.3686 kernel: foo"), []RFC3164Option{WithCurrentYear()}, + }, + { + []byte("<12>May 20 09:33:54 UDMPRO,a2edd0c6ae48,udm-1.10.0.3686 kernel: foo"), []RFC3164Option{WithStrictHostname()}, + }, + { + []byte("foobar"), []RFC3164Option{}, + }, + { + []byte("<12>"), []RFC3164Option{}, + }, + { + []byte("<12>May 02 09:33:54"), []RFC3164Option{}, + }, + { + []byte("<12>May 02 09:33:54 foo.bar"), []RFC3164Option{}, + }, + { + []byte("<12>May 02 09:33:54 foo.bar bla[42"), []RFC3164Option{}, + }, + { + []byte("<12>May 02 09:33:54 foo.bar bla[42]"), []RFC3164Option{}, + }, + { + []byte("<12>May 02 09:33:54 foo.bar bla[42]: "), []RFC3164Option{}, + }, + { + []byte("<12>May 02 09:33:54 foo.bar bla"), []RFC3164Option{}, + }, + { + []byte("<12>May 02 09:33:54 foo.bar bla:"), []RFC3164Option{}, + }, + { + []byte(""), []RFC3164Option{}, + }, + } + var err error + for _, test := range tests { + b.Run(string(test.input), func(b *testing.B) { + for i := 0; i < b.N; i++ { + r := NewRFC3164Parser(test.opts...) + err = r.Parse(test.input) + } + }) + } + e = err +} diff --git a/pkg/acquisition/modules/syslog/internal/parser/rfc5424/parse.go b/pkg/acquisition/modules/syslog/internal/parser/rfc5424/parse.go new file mode 100644 index 000000000..8b71a77e2 --- /dev/null +++ b/pkg/acquisition/modules/syslog/internal/parser/rfc5424/parse.go @@ -0,0 +1,398 @@ +package rfc5424 + +import ( + "fmt" + "time" + + "github.com/crowdsecurity/crowdsec/pkg/acquisition/modules/syslog/internal/parser/utils" +) + +type RFC5424Option func(*RFC5424) + +type RFC5424 struct { + PRI int + Timestamp time.Time + Hostname string + Tag string + Message string + PID string + MsgID string + // + len int + position int + buf []byte + useCurrentYear bool //If no year is specified in the timestamp, use the current year + strictHostname bool //If the hostname contains invalid characters or is not an IP, return an error +} + +const PRI_MAX_LEN = 3 + +const NIL_VALUE = '-' + +var VALID_TIMESTAMPS = []string{ + time.RFC3339, +} + +const VALID_TIMESTAMP = time.RFC3339Nano + +func WithCurrentYear() RFC5424Option { + return func(r *RFC5424) { + r.useCurrentYear = true + } +} + +func WithStrictHostname() RFC5424Option { + return func(r *RFC5424) { + r.strictHostname = true + } +} + +func (r *RFC5424) parsePRI() error { + + pri := 0 + + if r.buf[r.position] != '<' { + return fmt.Errorf("PRI must start with '<'") + } + + r.position++ + + for r.position < r.len { + c := r.buf[r.position] + if c == '>' { + r.position++ + break + } + if c < '0' || c > '9' { + return fmt.Errorf("PRI must be a number") + } + pri = pri*10 + int(c-'0') + r.position++ + } + + if pri > 999 { + return fmt.Errorf("PRI must be up to 3 characters long") + } + + if r.position == r.len && r.buf[r.position-1] != '>' { + return fmt.Errorf("PRI must end with '>'") + } + + r.PRI = pri + return nil +} + +func (r *RFC5424) parseVersion() error { + if r.buf[r.position] != '1' { + return fmt.Errorf("version must be 1") + } + r.position += 2 + if r.position >= r.len { + return fmt.Errorf("version must be followed by a space") + } + return nil +} + +func (r *RFC5424) parseTimestamp() error { + + timestamp := []byte{} + + if r.buf[r.position] == NIL_VALUE { + r.Timestamp = time.Now().UTC().Round(0) + r.position += 2 + return nil + } + + for r.position < r.len { + c := r.buf[r.position] + if c == ' ' { + break + } + timestamp = append(timestamp, c) + r.position++ + } + + if len(timestamp) == 0 { + return fmt.Errorf("timestamp is empty") + } + + if r.position == r.len { + return fmt.Errorf("EOL after timestamp") + } + + date, err := time.Parse(VALID_TIMESTAMP, string(timestamp)) + + if err != nil { + return fmt.Errorf("timestamp is not valid") + } + + r.Timestamp = date + + r.position++ + + if r.position >= r.len { + return fmt.Errorf("EOL after timestamp") + } + + return nil +} + +func (r *RFC5424) parseHostname() error { + if r.buf[r.position] == NIL_VALUE { + r.Hostname = "" + r.position += 2 + return nil + } + + hostname := []byte{} + for r.position < r.len { + c := r.buf[r.position] + if c == ' ' { + r.position++ + break + } + hostname = append(hostname, c) + r.position++ + } + if r.strictHostname { + if !utils.IsValidHostnameOrIP(string(hostname)) { + return fmt.Errorf("hostname is not valid") + } + } + if len(hostname) == 0 { + return fmt.Errorf("hostname is empty") + } + r.Hostname = string(hostname) + return nil +} + +func (r *RFC5424) parseAppName() error { + if r.buf[r.position] == NIL_VALUE { + r.Tag = "" + r.position += 2 + return nil + } + + appname := []byte{} + for r.position < r.len { + c := r.buf[r.position] + if c == ' ' { + r.position++ + break + } + appname = append(appname, c) + r.position++ + } + + if len(appname) == 0 { + return fmt.Errorf("appname is empty") + } + + if len(appname) > 48 { + return fmt.Errorf("appname is too long") + } + + r.Tag = string(appname) + return nil +} + +func (r *RFC5424) parseProcID() error { + if r.buf[r.position] == NIL_VALUE { + r.PID = "" + r.position += 2 + return nil + } + + procid := []byte{} + for r.position < r.len { + c := r.buf[r.position] + if c == ' ' { + r.position++ + break + } + procid = append(procid, c) + r.position++ + } + + if len(procid) == 0 { + return fmt.Errorf("procid is empty") + } + + if len(procid) > 128 { + return fmt.Errorf("procid is too long") + } + + r.PID = string(procid) + return nil +} + +func (r *RFC5424) parseMsgID() error { + if r.buf[r.position] == NIL_VALUE { + r.MsgID = "" + r.position += 2 + return nil + } + + msgid := []byte{} + for r.position < r.len { + c := r.buf[r.position] + if c == ' ' { + r.position++ + break + } + msgid = append(msgid, c) + r.position++ + } + + if len(msgid) == 0 { + return fmt.Errorf("msgid is empty") + } + + if len(msgid) > 32 { + return fmt.Errorf("msgid is too long") + } + + r.MsgID = string(msgid) + return nil +} + +func (r *RFC5424) parseStructuredData() error { + done := false + if r.buf[r.position] == NIL_VALUE { + r.position += 2 + return nil + } + if r.buf[r.position] != '[' { + return fmt.Errorf("structured data must start with '[' or be '-'") + } + prev := byte(0) + for r.position < r.len { + done = false + c := r.buf[r.position] + if c == ']' && prev != '\\' { + done = true + r.position++ + if r.position < r.len && r.buf[r.position] == ' ' { + break + } + } + prev = c + r.position++ + } + r.position++ + if !done { + return fmt.Errorf("structured data must end with ']'") + } + return nil +} + +func (r *RFC5424) parseMessage() error { + if r.position == r.len { + return fmt.Errorf("message is empty") + } + + message := []byte{} + + for r.position < r.len { + c := r.buf[r.position] + message = append(message, c) + r.position++ + } + r.Message = string(message) + return nil +} + +func (r *RFC5424) Parse(message []byte) error { + r.len = len(message) + if r.len == 0 { + return fmt.Errorf("syslog line is empty") + } + r.buf = message + + err := r.parsePRI() + if err != nil { + return err + } + + if r.position >= r.len { + return fmt.Errorf("EOL after PRI") + } + + err = r.parseVersion() + if err != nil { + return err + } + + if r.position >= r.len { + return fmt.Errorf("EOL after Version") + } + + err = r.parseTimestamp() + if err != nil { + return err + } + + if r.position >= r.len { + return fmt.Errorf("EOL after Timestamp") + } + + err = r.parseHostname() + if err != nil { + return err + } + + if r.position >= r.len { + return fmt.Errorf("EOL after hostname") + } + + err = r.parseAppName() + if err != nil { + return err + } + + if r.position >= r.len { + return fmt.Errorf("EOL after appname") + } + + err = r.parseProcID() + if err != nil { + return err + } + + if r.position >= r.len { + return fmt.Errorf("EOL after ProcID") + } + + err = r.parseMsgID() + if err != nil { + return err + } + + if r.position >= r.len { + return fmt.Errorf("EOL after MSGID") + } + + err = r.parseStructuredData() + if err != nil { + return err + } + + if r.position >= r.len { + return fmt.Errorf("EOL after SD") + } + + err = r.parseMessage() + if err != nil { + return err + } + + return nil +} + +func NewRFC5424Parser(opts ...RFC5424Option) *RFC5424 { + r := &RFC5424{} + for _, opt := range opts { + opt(r) + } + return r +} diff --git a/pkg/acquisition/modules/syslog/internal/parser/rfc5424/parse_test.go b/pkg/acquisition/modules/syslog/internal/parser/rfc5424/parse_test.go new file mode 100644 index 000000000..23dc8e53a --- /dev/null +++ b/pkg/acquisition/modules/syslog/internal/parser/rfc5424/parse_test.go @@ -0,0 +1,269 @@ +package rfc5424 + +import ( + "testing" + "time" +) + +func TestPri(t *testing.T) { + tests := []struct { + input string + expected int + expectedErr string + }{ + {"<0>", 0, ""}, + {"<19>", 19, ""}, + {"<200>", 200, ""}, + {"<4999>", 0, "PRI must be up to 3 characters long"}, + {"<123", 0, "PRI must end with '>'"}, + {"123>", 0, "PRI must start with '<'"}, + {"", 0, "PRI must be a number"}, + } + + for _, test := range tests { + t.Run(test.input, func(t *testing.T) { + r := &RFC5424{} + r.buf = []byte(test.input) + r.len = len(r.buf) + err := r.parsePRI() + if err != nil { + if test.expectedErr != "" { + if err.Error() != test.expectedErr { + t.Errorf("expected error %s, got %s", test.expectedErr, err.Error()) + } + } else { + t.Errorf("unexpected error: %s", err.Error()) + } + } else { + if test.expectedErr != "" { + t.Errorf("expected error %s, got no error", test.expectedErr) + } else { + if r.PRI != test.expected { + t.Errorf("expected %d, got %d", test.expected, r.PRI) + } + } + } + }) + } +} + +func TestHostname(t *testing.T) { + tests := []struct { + input string + expected string + expectedErr string + strictHostname bool + }{ + {"127.0.0.1", "127.0.0.1", "", false}, + {"::1", "::1", "", false}, + {"-", "", "", false}, + {"foo.-bar", "", "hostname is not valid", true}, + {"foo-.bar", "", "hostname is not valid", true}, + {"foo123.bar", "foo123.bar", "", true}, + {"a..", "", "hostname is not valid", true}, + {"foo.bar", "foo.bar", "", false}, + {"foo,bar", "foo,bar", "", false}, + {"foo,bar", "", "hostname is not valid", true}, + {".", ".", "", true}, + {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "", "hostname is not valid", true}, + {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.bla", "", "hostname is not valid", true}, + {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.bla", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.bla", "", false}, + {"a.foo-", "", "hostname is not valid", true}, + } + + for _, test := range tests { + t.Run(test.input, func(t *testing.T) { + opts := []RFC5424Option{} + if test.strictHostname { + opts = append(opts, WithStrictHostname()) + } + r := NewRFC5424Parser(opts...) + r.buf = []byte(test.input) + r.len = len(r.buf) + err := r.parseHostname() + if err != nil { + if test.expectedErr != "" { + if err.Error() != test.expectedErr { + t.Errorf("expected error %s, got %s", test.expectedErr, err.Error()) + } + } else { + t.Errorf("unexpected error: %s", err.Error()) + } + } else { + if test.expectedErr != "" { + t.Errorf("expected error %s, got no error", test.expectedErr) + } else { + if r.Hostname != test.expected { + t.Errorf("expected %s, got %s", test.expected, r.Hostname) + } + } + } + }) + } +} + +func TestParse(t *testing.T) { + type expected struct { + Timestamp time.Time + Hostname string + Tag string + PID string + Message string + PRI int + MsgID string + } + + tests := []struct { + name string + input string + expected expected + expectedErr string + opts []RFC5424Option + }{ + { + "valid msg", + `<13>1 2021-05-18T11:58:40.828081+02:42 mantis sshd 49340 - [timeQuality isSynced="0" tzKnown="1"] blabla`, expected{ + Timestamp: time.Date(2021, 5, 18, 11, 58, 40, 828081000, time.FixedZone("+0242", 9720)), + Hostname: "mantis", + Tag: "sshd", + PID: "49340", + MsgID: "", + Message: "blabla", + PRI: 13, + }, "", []RFC5424Option{}, + }, + { + "valid msg with msgid", + `<13>1 2021-05-18T11:58:40.828081+02:42 mantis foobar 49340 123123 [timeQuality isSynced="0" tzKnown="1"] blabla`, expected{ + Timestamp: time.Date(2021, 5, 18, 11, 58, 40, 828081000, time.FixedZone("+0242", 9720)), + Hostname: "mantis", + Tag: "foobar", + PID: "49340", + MsgID: "123123", + Message: "blabla", + PRI: 13, + }, "", []RFC5424Option{}, + }, + { + "valid msg with repeating SD", + `<13>1 2021-05-18T11:58:40.828081+02:42 mantis foobar 49340 123123 [timeQuality isSynced="0" tzKnown="1"][foo="bar][a] blabla`, expected{ + Timestamp: time.Date(2021, 5, 18, 11, 58, 40, 828081000, time.FixedZone("+0242", 9720)), + Hostname: "mantis", + Tag: "foobar", + PID: "49340", + MsgID: "123123", + Message: "blabla", + PRI: 13, + }, "", []RFC5424Option{}, + }, + { + "invalid SD", + `<13>1 2021-05-18T11:58:40.828081+02:00 mantis foobar 49340 123123 [timeQuality asd`, expected{}, "structured data must end with ']'", []RFC5424Option{}, + }, + { + "invalid version", + `<13>42 2021-05-18T11:58:40.828081+02:00 mantis foobar 49340 123123 [timeQuality isSynced="0" tzKnown="1"] blabla`, expected{}, "version must be 1", []RFC5424Option{}, + }, + { + "invalid message", + `<13>1`, expected{}, "version must be followed by a space", []RFC5424Option{}, + }, + { + "valid msg with empty fields", + `<13>1 - foo - - - - blabla`, expected{ + Timestamp: time.Now().UTC().Round(0), + Hostname: "foo", + PRI: 13, + Message: "blabla", + }, "", []RFC5424Option{}, + }, + { + "valid msg with empty fields", + `<13>1 - - - - - - blabla`, expected{ + Timestamp: time.Now().UTC().Round(0), + PRI: 13, + Message: "blabla", + }, "", []RFC5424Option{}, + }, + { + "valid msg with escaped SD", + `<13>1 2022-05-24T10:57:39Z testhostname unknown - sn="msgid" [foo="\]" bar="a\""][a b="[\]" c] testmessage`, + expected{ + PRI: 13, + Timestamp: time.Date(2022, 5, 24, 10, 57, 39, 0, time.UTC), + Tag: "unknown", + Hostname: "testhostname", + MsgID: `sn="msgid"`, + Message: `testmessage`, + }, "", []RFC5424Option{}, + }, + { + "valid complex msg", + `<13>1 2022-05-24T10:57:39Z myhostname unknown - sn="msgid" [all@0 request="/dist/precache-manifest.58b57debe6bc4f96698da0dc314461e9.js" src_ip_geo_country="DE" MONTH="May" COMMONAPACHELOG="1.1.1.1 - - [24/May/2022:10:57:37 +0200\] \"GET /dist/precache-manifest.58b57debe6bc4f96698da0dc314461e9.js HTTP/2.0\" 304 0" auth="-" HOUR="10" gl2_remote_ip="172.31.32.142" ident="-" gl2_remote_port="43375" BASE10NUM="[2.0, 304, 0\]" pid="-1" program="nginx" gl2_source_input="623ed3440183476d61cff974" INT="+0200" is_private_ip="false" YEAR="2022" src_ip_geo_city="Achern" clientip="1.1.1.1" USERNAME="-" src_ip_geo_location="48.6306,8.0743" gl2_source_node="8620c2bb-dbb7-4535-b1ce-83df223acd8d" MINUTE="57" timestamp="2022-05-24T08:57:37.000Z" src_ip_asn="3320" level="5" IP="1.1.1.1" IPV4="1.1.1.1" verb="GET" gl2_message_id="01G3TMJFAMFS4H60QSF7M029R0" TIME="10:57:37" USER="-" src_ip_asn_owner="Deutsche Telekom AG" response="304" bytes="0" SECOND="37" httpversion="2.0" _id="906ce155-db3f-11ec-b25f-0a189ba2c64e" facility="user" MONTHDAY="24"] source: sn="www.foobar.com" | message: 1.1.1.1 - - [24/May/2022:10:57:37 +0200] "GET /dist/precache-manifest.58b57debe6bc4f96698da0dc314461e9.js HTTP/2.0" 304 0 "https://www.foobar.com/sw.js" "Mozilla/5.0 (Linux; Android 9; ANE-LX1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.61 Mobile Safari/537.36" "-" "www.foobar.com" sn="www.foobar.com" rt=0.000 ua="-" us="-" ut="-" ul="-" cs=HIT { request: /dist/precache-manifest.58b57debe6bc4f96698da0dc314461e9.js | src_ip_geo_country: DE | MONTH: May | COMMONAPACHELOG: 1.1.1.1 - - [24/May/2022:10:57:37 +0200] "GET /dist/precache-manifest.58b57debe6bc4f96698da0dc314461e9.js HTTP/2.0" 304 0 | auth: - | HOUR: 10 | gl2_remote_ip: 172.31.32.142 | ident: - | gl2_remote_port: 43375 | BASE10NUM: [2.0, 304, 0] | pid: -1 | program: nginx | gl2_source_input: 623ed3440183476d61cff974 | INT: +0200 | is_private_ip: false | YEAR: 2022 | src_ip_geo_city: Achern | clientip: 1.1.1.1 | USERNAME:`, + expected{ + Timestamp: time.Date(2022, 5, 24, 10, 57, 39, 0, time.UTC), + Hostname: "myhostname", + Tag: "unknown", + PRI: 13, + MsgID: `sn="msgid"`, + Message: `source: sn="www.foobar.com" | message: 1.1.1.1 - - [24/May/2022:10:57:37 +0200] "GET /dist/precache-manifest.58b57debe6bc4f96698da0dc314461e9.js HTTP/2.0" 304 0 "https://www.foobar.com/sw.js" "Mozilla/5.0 (Linux; Android 9; ANE-LX1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.61 Mobile Safari/537.36" "-" "www.foobar.com" sn="www.foobar.com" rt=0.000 ua="-" us="-" ut="-" ul="-" cs=HIT { request: /dist/precache-manifest.58b57debe6bc4f96698da0dc314461e9.js | src_ip_geo_country: DE | MONTH: May | COMMONAPACHELOG: 1.1.1.1 - - [24/May/2022:10:57:37 +0200] "GET /dist/precache-manifest.58b57debe6bc4f96698da0dc314461e9.js HTTP/2.0" 304 0 | auth: - | HOUR: 10 | gl2_remote_ip: 172.31.32.142 | ident: - | gl2_remote_port: 43375 | BASE10NUM: [2.0, 304, 0] | pid: -1 | program: nginx | gl2_source_input: 623ed3440183476d61cff974 | INT: +0200 | is_private_ip: false | YEAR: 2022 | src_ip_geo_city: Achern | clientip: 1.1.1.1 | USERNAME:`, + }, "", []RFC5424Option{}, + }, + { + "partial message", + `<13>1 2022-05-24T10:57:39Z foo bar -`, + expected{}, + "EOL after ProcID", + []RFC5424Option{}, + }, + { + "partial message", + `<13>1 2022-05-24T10:57:39Z foo bar `, + expected{}, + "EOL after appname", + []RFC5424Option{}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + r := NewRFC5424Parser(test.opts...) + err := r.Parse([]byte(test.input)) + if err != nil { + if test.expectedErr != "" { + if err.Error() != test.expectedErr { + t.Errorf("expected error '%s', got '%s'", test.expectedErr, err.Error()) + } + } else { + t.Errorf("unexpected error: '%s'", err.Error()) + } + } else { + if test.expectedErr != "" { + t.Errorf("expected error '%s', got no error", test.expectedErr) + } else { + if r.Timestamp.Round(time.Second).String() != test.expected.Timestamp.Round(time.Second).String() { + t.Errorf("expected timestamp '%s', got '%s'", test.expected.Timestamp, r.Timestamp) + } + if r.Hostname != test.expected.Hostname { + t.Errorf("expected hostname '%s', got '%s'", test.expected.Hostname, r.Hostname) + } + if r.Tag != test.expected.Tag { + t.Errorf("expected tag '%s', got '%s'", test.expected.Tag, r.Tag) + } + if r.PID != test.expected.PID { + t.Errorf("expected pid '%s', got '%s'", test.expected.PID, r.PID) + } + if r.Message != test.expected.Message { + t.Errorf("expected message '%s', got '%s'", test.expected.Message, r.Message) + } + if r.PRI != test.expected.PRI { + t.Errorf("expected pri '%d', got '%d'", test.expected.PRI, r.PRI) + } + if r.MsgID != test.expected.MsgID { + t.Errorf("expected msgid '%s', got '%s'", test.expected.MsgID, r.MsgID) + } + } + } + }) + } +} diff --git a/pkg/acquisition/modules/syslog/internal/parser/rfc5424/perf_test.go b/pkg/acquisition/modules/syslog/internal/parser/rfc5424/perf_test.go new file mode 100644 index 000000000..a86c17e8d --- /dev/null +++ b/pkg/acquisition/modules/syslog/internal/parser/rfc5424/perf_test.go @@ -0,0 +1,103 @@ +package rfc5424 + +import "testing" + +var e error + +const BOM = "\xEF\xBB\xBF" + +//Test case are from https://github.com/influxdata/go-syslog (the parser we used previously) + +func BenchmarkParse(b *testing.B) { + tests := []struct { + label string + input []byte + }{ + { + label: "[no] empty input", + input: []byte(``), + }, + { + label: "[no] multiple syslog messages on multiple lines", + input: []byte("<1>1 - - - - - -\x0A<2>1 - - - - - -"), + }, + { + label: "[no] impossible timestamp", + input: []byte(`<101>11 2003-09-31T22:14:15.003Z`), + }, + { + label: "[no] malformed structured data", + input: []byte("<1>1 - - - - - X"), + }, + { + label: "[no] with duplicated structured data id", + input: []byte("<165>3 2003-10-11T22:14:15.003Z example.com evnts - ID27 [id1][id1]"), + }, + { + label: "[ok] minimal", + input: []byte(`<1>1 - - - - - -`), + }, + { + label: "[ok] average message", + input: []byte(`<29>1 2016-02-21T04:32:57+00:00 web1 someservice - - [origin x-service="someservice"][meta sequenceId="14125553"] 127.0.0.1 - - 1456029177 "GET /v1/ok HTTP/1.1" 200 145 "-" "hacheck 0.9.0" 24306 127.0.0.1:40124 575`), + }, + { + label: "[ok] complicated message", + input: []byte(`<78>1 2016-01-15T00:04:01Z host1 CROND 10391 - [meta sequenceId="29" sequenceBlah="foo"][my key="value"] some_message`), + }, + { + label: "[ok] very long message", + input: []byte(`<190>1 2016-02-21T01:19:11+00:00 batch6sj - - - [meta sequenceId="21881798" x-group="37051387"][origin x-service="tracking"] metascutellar conversationalist nephralgic exogenetic graphy streng outtaken acouasm amateurism prenotice Lyonese bedull antigrammatical diosphenol gastriloquial bayoneteer sweetener naggy roughhouser dighter addend sulphacid uneffectless ferroprussiate reveal Mazdaist plaudite Australasian distributival wiseman rumness Seidel topazine shahdom sinsion mesmerically pinguedinous ophthalmotonometer scuppler wound eciliate expectedly carriwitchet dictatorialism bindweb pyelitic idic atule kokoon poultryproof rusticial seedlip nitrosate splenadenoma holobenthic uneternal Phocaean epigenic doubtlessly indirection torticollar robomb adoptedly outspeak wappenschawing talalgia Goop domitic savola unstrafed carded unmagnified mythologically orchester obliteration imperialine undisobeyed galvanoplastical cycloplegia quinquennia foremean umbonal marcgraviaceous happenstance theoretical necropoles wayworn Igbira pseudoangelic raising unfrounced lamasary centaurial Japanolatry microlepidoptera`), + }, + { + label: "[ok] all max length and complete", + input: []byte(`<191>999 2018-12-31T23:59:59.999999-23:59 abcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabc abcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdef abcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzab abcdefghilmnopqrstuvzabcdefghilm [an@id key1="val1" key2="val2"][another@id key1="val1"] Some message "GET"`), + }, + { + label: "[ok] all max length except structured data and message", + input: []byte(`<191>999 2018-12-31T23:59:59.999999-23:59 abcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabc abcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdef abcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzab abcdefghilmnopqrstuvzabcdefghilm -`), + }, + { + label: "[ok] minimal with message containing newline", + input: []byte("<1>1 - - - - - - x\x0Ay"), + }, + { + label: "[ok] w/o procid, w/o structured data, with message starting with BOM", + input: []byte("<34>1 2003-10-11T22:14:15.003Z mymachine.example.com su - ID47 - " + BOM + "'su root' failed for lonvick on /dev/pts/8"), + }, + { + label: "[ok] minimal with UTF-8 message", + input: []byte("<0>1 - - - - - - ⠊⠀⠉⠁⠝⠀⠑⠁⠞⠀⠛⠇⠁⠎⠎⠀⠁⠝⠙⠀⠊⠞⠀⠙⠕⠑⠎⠝⠞⠀⠓⠥⠗⠞⠀⠍⠑"), + }, + { + label: "[ok] minimal with UTF-8 message starting with BOM", + input: []byte("<0>1 - - - - - - " + BOM + "⠊⠀⠉⠁⠝⠀⠑⠁⠞⠀⠛⠇⠁⠎⠎⠀⠁⠝⠙⠀⠊⠞⠀⠙⠕⠑⠎⠝⠞⠀⠓⠥⠗⠞⠀⠍⠑"), + }, + { + label: "[ok] with structured data id, w/o structured data params", + input: []byte(`<29>50 2016-01-15T01:00:43Z hn S - - [my@id]`), + }, + { + label: "[ok] with multiple structured data", + input: []byte(`<29>50 2016-01-15T01:00:43Z hn S - - [my@id1 k="v"][my@id2 c="val"]`), + }, + { + label: "[ok] with escaped backslash within structured data param value, with message", + input: []byte(`<29>50 2016-01-15T01:00:43Z hn S - - [meta es="\\valid"] 1452819643`), + }, + { + label: "[ok] with UTF-8 structured data param value, with message", + input: []byte(`<78>1 2016-01-15T00:04:01+00:00 host1 CROND 10391 - [sdid x="⌘"] some_message`), + }, + } + var err error + for _, test := range tests { + b.Run(test.label, func(b *testing.B) { + for i := 0; i < b.N; i++ { + r := NewRFC5424Parser() + err = r.Parse(test.input) + } + }) + } + e = err +} diff --git a/pkg/acquisition/modules/syslog/internal/parser/utils/utils.go b/pkg/acquisition/modules/syslog/internal/parser/utils/utils.go new file mode 100644 index 000000000..8fe717a6a --- /dev/null +++ b/pkg/acquisition/modules/syslog/internal/parser/utils/utils.go @@ -0,0 +1,76 @@ +package utils + +import "net" + +func isValidIP(ip string) bool { + return net.ParseIP(ip) != nil +} + +func IsAlphaNumeric(c byte) bool { + return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' +} + +//This function is lifted from go source +//See https://github.com/golang/go/blob/master/src/net/dnsclient.go#L75 +func isValidHostname(s string) bool { + // The root domain name is valid. See golang.org/issue/45715. + if s == "." { + return true + } + + // See RFC 1035, RFC 3696. + // Presentation format has dots before every label except the first, and the + // terminal empty label is optional here because we assume fully-qualified + // (absolute) input. We must therefore reserve space for the first and last + // labels' length octets in wire format, where they are necessary and the + // maximum total length is 255. + // So our _effective_ maximum is 253, but 254 is not rejected if the last + // character is a dot. + l := len(s) + if l == 0 || l > 254 || l == 254 && s[l-1] != '.' { + return false + } + + last := byte('.') + nonNumeric := false // true once we've seen a letter or hyphen + partlen := 0 + for i := 0; i < len(s); i++ { + c := s[i] + switch { + default: + return false + case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_': + nonNumeric = true + partlen++ + case '0' <= c && c <= '9': + // fine + partlen++ + case c == '-': + // Byte before dash cannot be dot. + if last == '.' { + return false + } + partlen++ + nonNumeric = true + case c == '.': + // Byte before dot cannot be dot, dash. + if last == '.' || last == '-' { + return false + } + if partlen > 63 || partlen == 0 { + return false + } + partlen = 0 + } + last = c + } + if last == '-' || partlen > 63 { + return false + } + + return nonNumeric +} + +func IsValidHostnameOrIP(hostname string) bool { + return isValidIP(hostname) || isValidHostname(hostname) +} diff --git a/pkg/acquisition/modules/syslog/internal/syslogserver.go b/pkg/acquisition/modules/syslog/internal/server/syslogserver.go similarity index 100% rename from pkg/acquisition/modules/syslog/internal/syslogserver.go rename to pkg/acquisition/modules/syslog/internal/server/syslogserver.go diff --git a/pkg/acquisition/modules/syslog/syslog.go b/pkg/acquisition/modules/syslog/syslog.go index 65bb76aff..2cd0083b7 100644 --- a/pkg/acquisition/modules/syslog/syslog.go +++ b/pkg/acquisition/modules/syslog/syslog.go @@ -3,15 +3,15 @@ package syslogacquisition import ( "fmt" "net" - "strconv" + "strings" "time" "github.com/crowdsecurity/crowdsec/pkg/acquisition/configuration" - syslogserver "github.com/crowdsecurity/crowdsec/pkg/acquisition/modules/syslog/internal" + "github.com/crowdsecurity/crowdsec/pkg/acquisition/modules/syslog/internal/parser/rfc3164" + "github.com/crowdsecurity/crowdsec/pkg/acquisition/modules/syslog/internal/parser/rfc5424" + syslogserver "github.com/crowdsecurity/crowdsec/pkg/acquisition/modules/syslog/internal/server" leaky "github.com/crowdsecurity/crowdsec/pkg/leakybucket" "github.com/crowdsecurity/crowdsec/pkg/types" - "github.com/influxdata/go-syslog/v3/rfc3164" - "github.com/influxdata/go-syslog/v3/rfc5424" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" log "github.com/sirupsen/logrus" @@ -133,49 +133,33 @@ func (s *SyslogSource) StreamingAcquisition(out chan types.Event, t *tomb.Tomb) return nil } -func (s *SyslogSource) buildLogFromSyslog(ts *time.Time, hostname *string, - appname *string, pid *string, msg *string) (string, error) { +func (s *SyslogSource) buildLogFromSyslog(ts time.Time, hostname string, + appname string, pid string, msg string) string { ret := "" - if msg == nil { - return "", errors.Errorf("missing message field in syslog message") - } - if ts != nil { + if !ts.IsZero() { ret += ts.Format("Jan 2 15:04:05") } else { - s.logger.Tracef("%s - missing TS", *msg) + s.logger.Tracef("%s - missing TS", msg) ret += time.Now().UTC().Format("Jan 2 15:04:05") } - if hostname != nil { - ret += " " + *hostname + if hostname != "" { + ret += " " + hostname } else { - s.logger.Tracef("%s - missing host", *msg) + s.logger.Tracef("%s - missing host", msg) ret += " unknownhost" } - if appname != nil { - ret += " " + *appname - } else { - return "", errors.Errorf("missing appname field in syslog message") + if appname != "" { + ret += " " + appname } - if pid != nil { - /* - !!! ugly hack !!! - Due to a bug in the syslog parser we use (https://github.com/influxdata/go-syslog/issues/31), - the ProcID field will contain garbage if the message as a ] anywhere in it. - Assume that a correctly formatted ProcID only contains number, and if this is not the case, set it to an arbitrary value - */ - _, err := strconv.Atoi(*pid) - if err != nil { - ret += "[1]: " - } else { - ret += "[" + *pid + "]: " - } + if pid != "" { + ret += "[" + pid + "]: " } else { ret += ": " } - if msg != nil { - ret += *msg + if msg != "" { + ret += msg } - return ret, nil + return ret } @@ -199,38 +183,24 @@ func (s *SyslogSource) handleSyslogMsg(out chan types.Event, t *tomb.Tomb, c cha logger := s.logger.WithField("client", syslogLine.Client) logger.Tracef("raw: %s", syslogLine) linesReceived.With(prometheus.Labels{"source": syslogLine.Client}).Inc() - p := rfc5424.NewParser() - m, err := p.Parse(syslogLine.Message) + p := rfc3164.NewRFC3164Parser(rfc3164.WithCurrentYear()) + err := p.Parse(syslogLine.Message) if err != nil { - logger.Debugf("could not parse as RFC5424 (%s)", err) - p = rfc3164.NewParser(rfc3164.WithYear(rfc3164.CurrentYear{})) - m, err = p.Parse(syslogLine.Message) + logger.Debugf("could not parse as RFC3164 (%s)", err) + p2 := rfc5424.NewRFC5424Parser() + err = p2.Parse(syslogLine.Message) if err != nil { logger.Errorf("could not parse message: %s", err) - logger.Debugf("could not parse as RFC3164 (%s) : %s", err, syslogLine.Message) + logger.Debugf("could not parse as RFC5424 (%s) : %s", err, syslogLine.Message) continue } - msg := m.(*rfc3164.SyslogMessage) - line, err = s.buildLogFromSyslog(msg.Timestamp, msg.Hostname, msg.Appname, msg.ProcID, msg.Message) - if err != nil { - logger.Debugf("could not parse as RFC3164 (%s) : %s", err, syslogLine.Message) - logger.Error(err) - continue - } - linesParsed.With(prometheus.Labels{"source": syslogLine.Client, - "type": "RFC3164"}).Inc() + line = s.buildLogFromSyslog(p2.Timestamp, p2.Hostname, p2.Tag, p2.PID, p2.Message) } else { - msg := m.(*rfc5424.SyslogMessage) - line, err = s.buildLogFromSyslog(msg.Timestamp, msg.Hostname, msg.Appname, msg.ProcID, msg.Message) - if err != nil { - log.Debugf("could not parse message as RFC5424 (%s) : %s", err, syslogLine.Message) - logger.Error(err) - continue - } - linesParsed.With(prometheus.Labels{"source": syslogLine.Client, - "type": "RFC5424"}).Inc() - + line = s.buildLogFromSyslog(p.Timestamp, p.Hostname, p.Tag, p.PID, p.Message) } + + line = strings.TrimSuffix(line, "\n") + l := types.Line{} l.Raw = line l.Module = s.GetName() diff --git a/pkg/acquisition/modules/syslog/syslog_test.go b/pkg/acquisition/modules/syslog/syslog_test.go index 49ecdce5c..1eee8dfe7 100644 --- a/pkg/acquisition/modules/syslog/syslog_test.go +++ b/pkg/acquisition/modules/syslog/syslog_test.go @@ -67,27 +67,36 @@ func writeToSyslog(logs []string) { return } for _, log := range logs { - fmt.Fprint(conn, log) + n, err := fmt.Fprint(conn, log) + if err != nil { + fmt.Printf("could not write to syslog server : %s", err) + return + } + if n != len(log) { + fmt.Printf("could not write to syslog server : %s", err) + return + } } } func TestStreamingAcquisition(t *testing.T) { tests := []struct { + name string config string expectedErr string logs []string expectedLines int }{ { - config: ` -source: syslog + name: "invalid msgs", + config: `source: syslog listen_port: 4242 listen_addr: 127.0.0.1`, logs: []string{"foobar", "bla", "pouet"}, }, { - config: ` -source: syslog + name: "RFC5424", + config: `source: syslog listen_port: 4242 listen_addr: 127.0.0.1`, expectedLines: 2, @@ -95,8 +104,8 @@ listen_addr: 127.0.0.1`, `<13>1 2021-05-18T12:12:37.560695+02:00 mantis sshd 49340 - [timeQuality isSynced="0" tzKnown="1"] blabla2[foobar]`}, }, { - config: ` -source: syslog + name: "RFC3164", + config: `source: syslog listen_port: 4242 listen_addr: 127.0.0.1`, expectedLines: 3, @@ -108,43 +117,54 @@ listen_addr: 127.0.0.1`, } if runtime.GOOS != "windows" { tests = append(tests, struct { + name string config string expectedErr string logs []string expectedLines int }{ + name: "privileged port", config: `source: syslog`, expectedErr: "could not start syslog server: could not listen on port 514: listen udp 127.0.0.1:514: bind: permission denied", }) } for _, ts := range tests { - subLogger := log.WithFields(log.Fields{ - "type": "syslog", - }) - s := SyslogSource{} - _ = s.Configure([]byte(ts.config), subLogger) - tomb := tomb.Tomb{} - out := make(chan types.Event) - err := s.StreamingAcquisition(out, &tomb) - cstest.AssertErrorContains(t, err, ts.expectedErr) - if err != nil { - continue - } - - actualLines := 0 - go writeToSyslog(ts.logs) - READLOOP: - for { - select { - case <-out: - actualLines++ - case <-time.After(2 * time.Second): - break READLOOP + t.Run(ts.name, func(t *testing.T) { + subLogger := log.WithFields(log.Fields{ + "type": "syslog", + }) + s := SyslogSource{} + err := s.Configure([]byte(ts.config), subLogger) + if err != nil { + t.Fatalf("could not configure syslog source : %s", err) } - } - assert.Equal(t, ts.expectedLines, actualLines) - tomb.Kill(nil) - tomb.Wait() + tomb := tomb.Tomb{} + out := make(chan types.Event) + err = s.StreamingAcquisition(out, &tomb) + cstest.AssertErrorContains(t, err, ts.expectedErr) + if ts.expectedErr != "" { + return + } + if err != nil && ts.expectedErr == "" { + t.Fatalf("unexpected error while starting syslog server: %s", err) + return + } + + actualLines := 0 + go writeToSyslog(ts.logs) + READLOOP: + for { + select { + case <-out: + actualLines++ + case <-time.After(2 * time.Second): + break READLOOP + } + } + assert.Equal(t, ts.expectedLines, actualLines) + tomb.Kill(nil) + tomb.Wait() + }) } }