diff --git a/pkg/beam/data/data.go b/pkg/beam/data/data.go new file mode 100644 index 0000000000..8ebee94a32 --- /dev/null +++ b/pkg/beam/data/data.go @@ -0,0 +1,112 @@ +package data + +import ( + "fmt" + "strings" + "strconv" +) + +func Encode(obj map[string][]string) string { + var msg string + msg += encodeHeader(0) + for k, values := range obj { + msg += encodeNamedList(k, values) + } + return msg +} + +func encodeHeader(msgtype int) string { + return fmt.Sprintf("%03.3d;", msgtype) +} + +func encodeString(s string) string { + return fmt.Sprintf("%d:%s,", len(s), s) +} + +func encodeList(l []string) string { + values := make([]string, 0, len(l)) + for _, s := range l { + values = append(values, encodeString(s)) + } + return encodeString(strings.Join(values, "")) +} + +func encodeNamedList(name string, l []string) string { + return encodeString(name) + encodeList(l) +} + +func Decode(msg string) (map[string][]string, error) { + msgtype, skip, err := decodeHeader(msg) + if err != nil { + return nil, err + } + if msgtype != 0 { + // FIXME: use special error type so the caller can easily ignore + return nil, fmt.Errorf("unknown message type: %d", msgtype) + } + msg = msg[skip:] + obj := make(map[string][]string) + for len(msg) > 0 { + k, skip, err := decodeString(msg) + if err != nil { + return nil, err + } + msg = msg[skip:] + values, skip, err := decodeList(msg) + if err != nil { + return nil, err + } + msg = msg[skip:] + obj[k] = values + } + return obj, nil +} + +func decodeList(msg string) ([]string, int, error) { + blob, skip, err := decodeString(msg) + if err != nil { + return nil, 0, err + } + var l []string + for len(blob) > 0 { + v, skipv, err := decodeString(blob) + if err != nil { + return nil, 0, err + } + l = append(l, v) + blob = blob[skipv:] + } + return l, skip, nil +} + +func decodeString(msg string) (string, int, error) { + parts := strings.SplitN(msg, ":", 2) + if len(parts) != 2 { + return "", 0, fmt.Errorf("invalid format: no column") + } + var length int + if l, err := strconv.ParseUint(parts[0], 10, 64); err != nil { + return "", 0, err + } else { + length = int(l) + } + if len(parts[1]) < length + 1 { + return "", 0, fmt.Errorf("message is less than %d bytes", length) + } + payload := parts[1][:length + 1] + if payload[length] != ',' { + return "", 0, fmt.Errorf("message is not comma-terminated") + } + return payload[:length], len(parts[0]) + length + 1, nil +} + +func decodeHeader(msg string) (int, int, error) { + if len(msg) < 4 { + return 0, 0, fmt.Errorf("message too small") + } + msgtype, err := strconv.ParseInt(msg[:3], 10, 32) + if err != nil { + return 0, 0, err + } + return int(msgtype), 4, nil +} diff --git a/pkg/beam/data/data_test.go b/pkg/beam/data/data_test.go new file mode 100644 index 0000000000..b6573a1f74 --- /dev/null +++ b/pkg/beam/data/data_test.go @@ -0,0 +1,91 @@ +package data + +import ( + "testing" +) + +func TestEncodeHelloWorld(t *testing.T) { + input := "hello world!" + output := encodeString(input) + expectedOutput := "12:hello world!," + if output != expectedOutput { + t.Fatalf("'%v' != '%v'", output, expectedOutput) + } +} + +func TestEncodeEmptyString(t *testing.T) { + input := "" + output := encodeString(input) + expectedOutput := "0:," + if output != expectedOutput { + t.Fatalf("'%v' != '%v'", output, expectedOutput) + } +} + +func TestEncodeEmptyList(t *testing.T) { + input := []string{} + output := encodeList(input) + expectedOutput := "0:," + if output != expectedOutput { + t.Fatalf("'%v' != '%v'", output, expectedOutput) + } +} + +func TestEncodeEmptyMap(t *testing.T) { + input := make(map[string][]string) + output := Encode(input) + expectedOutput := "000;" + if output != expectedOutput { + t.Fatalf("'%v' != '%v'", output, expectedOutput) + } +} + +func TestEncode1Key1Value(t *testing.T) { + input := make(map[string][]string) + input["hello"] = []string{"world"} + output := Encode(input) + expectedOutput := "000;5:hello,8:5:world,," + if output != expectedOutput { + t.Fatalf("'%v' != '%v'", output, expectedOutput) + } +} + +func TestEncode1Key2Value(t *testing.T) { + input := make(map[string][]string) + input["hello"] = []string{"beautiful", "world"} + output := Encode(input) + expectedOutput := "000;5:hello,20:9:beautiful,5:world,," + if output != expectedOutput { + t.Fatalf("'%v' != '%v'", output, expectedOutput) + } +} + +func TestEncodeEmptyValue(t *testing.T) { + input := make(map[string][]string) + input["foo"] = []string{} + output := Encode(input) + expectedOutput := "000;3:foo,0:," + if output != expectedOutput { + t.Fatalf("'%v' != '%v'", output, expectedOutput) + } +} + +func TestEncodeBinaryKey(t *testing.T) { + input := make(map[string][]string) + input["foo\x00bar\x7f"] = []string{} + output := Encode(input) + expectedOutput := "000;8:foo\x00bar\x7f,0:," + if output != expectedOutput { + t.Fatalf("'%v' != '%v'", output, expectedOutput) + } +} + +func TestEncodeBinaryValue(t *testing.T) { + input := make(map[string][]string) + input["foo\x00bar\x7f"] = []string{"\x01\x02\x03\x04"} + output := Encode(input) + expectedOutput := "000;8:foo\x00bar\x7f,7:4:\x01\x02\x03\x04,," + if output != expectedOutput { + t.Fatalf("'%v' != '%v'", output, expectedOutput) + } +} diff --git a/pkg/beam/data/netstring.txt b/pkg/beam/data/netstring.txt new file mode 100644 index 0000000000..17560929b6 --- /dev/null +++ b/pkg/beam/data/netstring.txt @@ -0,0 +1,92 @@ +## +## Netstrings spec copied as-is from http://cr.yp.to/proto/netstrings.txt +## + +Netstrings +D. J. Bernstein, djb@pobox.com +19970201 + + +1. Introduction + + A netstring is a self-delimiting encoding of a string. Netstrings are + very easy to generate and to parse. Any string may be encoded as a + netstring; there are no restrictions on length or on allowed bytes. + Another virtue of a netstring is that it declares the string size up + front. Thus an application can check in advance whether it has enough + space to store the entire string. + + Netstrings may be used as a basic building block for reliable network + protocols. Most high-level protocols, in effect, transmit a sequence + of strings; those strings may be encoded as netstrings and then + concatenated into a sequence of characters, which in turn may be + transmitted over a reliable stream protocol such as TCP. + + Note that netstrings can be used recursively. The result of encoding + a sequence of strings is a single string. A series of those encoded + strings may in turn be encoded into a single string. And so on. + + In this document, a string of 8-bit bytes may be written in two + different forms: as a series of hexadecimal numbers between angle + brackets, or as a sequence of ASCII characters between double quotes. + For example, <68 65 6c 6c 6f 20 77 6f 72 6c 64 21> is a string of + length 12; it is the same as the string "hello world!". + + Although this document restricts attention to strings of 8-bit bytes, + netstrings could be used with any 6-bit-or-larger character set. + + +2. Definition + + Any string of 8-bit bytes may be encoded as [len]":"[string]",". + Here [string] is the string and [len] is a nonempty sequence of ASCII + digits giving the length of [string] in decimal. The ASCII digits are + <30> for 0, <31> for 1, and so on up through <39> for 9. Extra zeros + at the front of [len] are prohibited: [len] begins with <30> exactly + when [string] is empty. + + For example, the string "hello world!" is encoded as <31 32 3a 68 + 65 6c 6c 6f 20 77 6f 72 6c 64 21 2c>, i.e., "12:hello world!,". The + empty string is encoded as "0:,". + + [len]":"[string]"," is called a netstring. [string] is called the + interpretation of the netstring. + + +3. Sample code + + The following C code starts with a buffer buf of length len and + prints it as a netstring. + + if (printf("%lu:",len) < 0) barf(); + if (fwrite(buf,1,len,stdout) < len) barf(); + if (putchar(',') < 0) barf(); + + The following C code reads a netstring and decodes it into a + dynamically allocated buffer buf of length len. + + if (scanf("%9lu",&len) < 1) barf(); /* >999999999 bytes is bad */ + if (getchar() != ':') barf(); + buf = malloc(len + 1); /* malloc(0) is not portable */ + if (!buf) barf(); + if (fread(buf,1,len,stdin) < len) barf(); + if (getchar() != ',') barf(); + + Both of these code fragments assume that the local character set is + ASCII, and that the relevant stdio streams are in binary mode. + + +4. Security considerations + + The famous Finger security hole may be blamed on Finger's use of the + CRLF encoding. In that encoding, each string is simply terminated by + CRLF. This encoding has several problems. Most importantly, it does + not declare the string size in advance. This means that a correct + CRLF parser must be prepared to ask for more and more memory as it is + reading the string. In the case of Finger, a lazy implementor found + this to be too much trouble; instead he simply declared a fixed-size + buffer and used C's gets() function. The rest is history. + + In contrast, as the above sample code shows, it is very easy to + handle netstrings without risking buffer overflow. Thus widespread + use of netstrings may improve network security.