fluent.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483
  1. package fluent
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "fmt"
  6. "math"
  7. "net"
  8. "os"
  9. "reflect"
  10. "strconv"
  11. "sync"
  12. "time"
  13. "bytes"
  14. "encoding/base64"
  15. "encoding/binary"
  16. "math/rand"
  17. "github.com/tinylib/msgp/msgp"
  18. )
  19. const (
  20. defaultHost = "127.0.0.1"
  21. defaultNetwork = "tcp"
  22. defaultSocketPath = ""
  23. defaultPort = 24224
  24. defaultTimeout = 3 * time.Second
  25. defaultWriteTimeout = time.Duration(0) // Write() will not time out
  26. defaultBufferLimit = 8 * 1024
  27. defaultRetryWait = 500
  28. defaultMaxRetryWait = 60000
  29. defaultMaxRetry = 13
  30. defaultReconnectWaitIncreRate = 1.5
  31. // Default sub-second precision value to false since it is only compatible
  32. // with fluentd versions v0.14 and above.
  33. defaultSubSecondPrecision = false
  34. )
  35. type Config struct {
  36. FluentPort int `json:"fluent_port"`
  37. FluentHost string `json:"fluent_host"`
  38. FluentNetwork string `json:"fluent_network"`
  39. FluentSocketPath string `json:"fluent_socket_path"`
  40. Timeout time.Duration `json:"timeout"`
  41. WriteTimeout time.Duration `json:"write_timeout"`
  42. BufferLimit int `json:"buffer_limit"`
  43. RetryWait int `json:"retry_wait"`
  44. MaxRetry int `json:"max_retry"`
  45. MaxRetryWait int `json:"max_retry_wait"`
  46. TagPrefix string `json:"tag_prefix"`
  47. Async bool `json:"async"`
  48. ForceStopAsyncSend bool `json:"force_stop_async_send"`
  49. // Deprecated: Use Async instead
  50. AsyncConnect bool `json:"async_connect"`
  51. MarshalAsJSON bool `json:"marshal_as_json"`
  52. // Sub-second precision timestamps are only possible for those using fluentd
  53. // v0.14+ and serializing their messages with msgpack.
  54. SubSecondPrecision bool `json:"sub_second_precision"`
  55. // RequestAck sends the chunk option with a unique ID. The server will
  56. // respond with an acknowledgement. This option improves the reliability
  57. // of the message transmission.
  58. RequestAck bool `json:"request_ack"`
  59. }
  60. type ErrUnknownNetwork struct {
  61. network string
  62. }
  63. func (e *ErrUnknownNetwork) Error() string {
  64. return "unknown network " + e.network
  65. }
  66. func NewErrUnknownNetwork(network string) error {
  67. return &ErrUnknownNetwork{network}
  68. }
  69. type msgToSend struct {
  70. data []byte
  71. ack string
  72. }
  73. type Fluent struct {
  74. Config
  75. dialer dialer
  76. stopRunning chan bool
  77. pending chan *msgToSend
  78. pendingMutex sync.RWMutex
  79. chanClosed bool
  80. wg sync.WaitGroup
  81. muconn sync.Mutex
  82. conn net.Conn
  83. }
  84. // New creates a new Logger.
  85. func New(config Config) (*Fluent, error) {
  86. if config.Timeout == 0 {
  87. config.Timeout = defaultTimeout
  88. }
  89. return newWithDialer(config, &net.Dialer{
  90. Timeout: config.Timeout,
  91. })
  92. }
  93. type dialer interface {
  94. Dial(string, string) (net.Conn, error)
  95. }
  96. func newWithDialer(config Config, d dialer) (f *Fluent, err error) {
  97. if config.FluentNetwork == "" {
  98. config.FluentNetwork = defaultNetwork
  99. }
  100. if config.FluentHost == "" {
  101. config.FluentHost = defaultHost
  102. }
  103. if config.FluentPort == 0 {
  104. config.FluentPort = defaultPort
  105. }
  106. if config.FluentSocketPath == "" {
  107. config.FluentSocketPath = defaultSocketPath
  108. }
  109. if config.WriteTimeout == 0 {
  110. config.WriteTimeout = defaultWriteTimeout
  111. }
  112. if config.BufferLimit == 0 {
  113. config.BufferLimit = defaultBufferLimit
  114. }
  115. if config.RetryWait == 0 {
  116. config.RetryWait = defaultRetryWait
  117. }
  118. if config.MaxRetry == 0 {
  119. config.MaxRetry = defaultMaxRetry
  120. }
  121. if config.MaxRetryWait == 0 {
  122. config.MaxRetryWait = defaultMaxRetryWait
  123. }
  124. if config.AsyncConnect {
  125. fmt.Fprintf(os.Stderr, "fluent#New: AsyncConnect is now deprecated, please use Async instead")
  126. config.Async = config.Async || config.AsyncConnect
  127. }
  128. if config.Async {
  129. f = &Fluent{
  130. Config: config,
  131. dialer: d,
  132. pending: make(chan *msgToSend, config.BufferLimit),
  133. pendingMutex: sync.RWMutex{},
  134. stopRunning: make(chan bool, 1),
  135. }
  136. f.wg.Add(1)
  137. go f.run()
  138. } else {
  139. f = &Fluent{
  140. Config: config,
  141. dialer: d,
  142. }
  143. err = f.connect()
  144. }
  145. return
  146. }
  147. // Post writes the output for a logging event.
  148. //
  149. // Examples:
  150. //
  151. // // send map[string]
  152. // mapStringData := map[string]string{
  153. // "foo": "bar",
  154. // }
  155. // f.Post("tag_name", mapStringData)
  156. //
  157. // // send message with specified time
  158. // mapStringData := map[string]string{
  159. // "foo": "bar",
  160. // }
  161. // tm := time.Now()
  162. // f.PostWithTime("tag_name", tm, mapStringData)
  163. //
  164. // // send struct
  165. // structData := struct {
  166. // Name string `msg:"name"`
  167. // } {
  168. // "john smith",
  169. // }
  170. // f.Post("tag_name", structData)
  171. //
  172. func (f *Fluent) Post(tag string, message interface{}) error {
  173. timeNow := time.Now()
  174. return f.PostWithTime(tag, timeNow, message)
  175. }
  176. func (f *Fluent) PostWithTime(tag string, tm time.Time, message interface{}) error {
  177. if len(f.TagPrefix) > 0 {
  178. tag = f.TagPrefix + "." + tag
  179. }
  180. if m, ok := message.(msgp.Marshaler); ok {
  181. return f.EncodeAndPostData(tag, tm, m)
  182. }
  183. msg := reflect.ValueOf(message)
  184. msgtype := msg.Type()
  185. if msgtype.Kind() == reflect.Struct {
  186. // message should be tagged by "codec" or "msg"
  187. kv := make(map[string]interface{})
  188. fields := msgtype.NumField()
  189. for i := 0; i < fields; i++ {
  190. field := msgtype.Field(i)
  191. name := field.Name
  192. if n1 := field.Tag.Get("msg"); n1 != "" {
  193. name = n1
  194. } else if n2 := field.Tag.Get("codec"); n2 != "" {
  195. name = n2
  196. }
  197. kv[name] = msg.FieldByIndex(field.Index).Interface()
  198. }
  199. return f.EncodeAndPostData(tag, tm, kv)
  200. }
  201. if msgtype.Kind() != reflect.Map {
  202. return errors.New("fluent#PostWithTime: message must be a map")
  203. } else if msgtype.Key().Kind() != reflect.String {
  204. return errors.New("fluent#PostWithTime: map keys must be strings")
  205. }
  206. kv := make(map[string]interface{})
  207. for _, k := range msg.MapKeys() {
  208. kv[k.String()] = msg.MapIndex(k).Interface()
  209. }
  210. return f.EncodeAndPostData(tag, tm, kv)
  211. }
  212. func (f *Fluent) EncodeAndPostData(tag string, tm time.Time, message interface{}) error {
  213. var msg *msgToSend
  214. var err error
  215. if msg, err = f.EncodeData(tag, tm, message); err != nil {
  216. return fmt.Errorf("fluent#EncodeAndPostData: can't convert '%#v' to msgpack:%v", message, err)
  217. }
  218. return f.postRawData(msg)
  219. }
  220. // Deprecated: Use EncodeAndPostData instead
  221. func (f *Fluent) PostRawData(msg *msgToSend) {
  222. f.postRawData(msg)
  223. }
  224. func (f *Fluent) postRawData(msg *msgToSend) error {
  225. if f.Config.Async {
  226. return f.appendBuffer(msg)
  227. }
  228. // Synchronous write
  229. return f.write(msg)
  230. }
  231. // For sending forward protocol adopted JSON
  232. type MessageChunk struct {
  233. message Message
  234. }
  235. // Golang default marshaler does not support
  236. // ["value", "value2", {"key":"value"}] style marshaling.
  237. // So, it should write JSON marshaler by hand.
  238. func (chunk *MessageChunk) MarshalJSON() ([]byte, error) {
  239. data, err := json.Marshal(chunk.message.Record)
  240. if err != nil {
  241. return nil, err
  242. }
  243. option, err := json.Marshal(chunk.message.Option)
  244. if err != nil {
  245. return nil, err
  246. }
  247. return []byte(fmt.Sprintf("[\"%s\",%d,%s,%s]", chunk.message.Tag,
  248. chunk.message.Time, data, option)), err
  249. }
  250. // getUniqueID returns a base64 encoded unique ID that can be used for chunk/ack
  251. // mechanism, see
  252. // https://github.com/fluent/fluentd/wiki/Forward-Protocol-Specification-v1#option
  253. func getUniqueID(timeUnix int64) (string, error) {
  254. buf := bytes.NewBuffer(nil)
  255. enc := base64.NewEncoder(base64.StdEncoding, buf)
  256. if err := binary.Write(enc, binary.LittleEndian, timeUnix); err != nil {
  257. enc.Close()
  258. return "", err
  259. }
  260. if err := binary.Write(enc, binary.LittleEndian, rand.Uint64()); err != nil {
  261. enc.Close()
  262. return "", err
  263. }
  264. // encoder needs to be closed before buf.String(), defer does not work
  265. // here
  266. enc.Close()
  267. return buf.String(), nil
  268. }
  269. func (f *Fluent) EncodeData(tag string, tm time.Time, message interface{}) (msg *msgToSend, err error) {
  270. option := make(map[string]string)
  271. msg = &msgToSend{}
  272. timeUnix := tm.Unix()
  273. if f.Config.RequestAck {
  274. var err error
  275. msg.ack, err = getUniqueID(timeUnix)
  276. if err != nil {
  277. return nil, err
  278. }
  279. option["chunk"] = msg.ack
  280. }
  281. if f.Config.MarshalAsJSON {
  282. m := Message{Tag: tag, Time: timeUnix, Record: message, Option: option}
  283. chunk := &MessageChunk{message: m}
  284. msg.data, err = json.Marshal(chunk)
  285. } else if f.Config.SubSecondPrecision {
  286. m := &MessageExt{Tag: tag, Time: EventTime(tm), Record: message, Option: option}
  287. msg.data, err = m.MarshalMsg(nil)
  288. } else {
  289. m := &Message{Tag: tag, Time: timeUnix, Record: message, Option: option}
  290. msg.data, err = m.MarshalMsg(nil)
  291. }
  292. return
  293. }
  294. // Close closes the connection, waiting for pending logs to be sent
  295. func (f *Fluent) Close() (err error) {
  296. defer f.close(f.conn)
  297. if f.Config.Async {
  298. f.pendingMutex.Lock()
  299. if f.chanClosed {
  300. f.pendingMutex.Unlock()
  301. return nil
  302. }
  303. f.chanClosed = true
  304. f.pendingMutex.Unlock()
  305. if f.Config.ForceStopAsyncSend {
  306. f.stopRunning <- true
  307. close(f.stopRunning)
  308. }
  309. close(f.pending)
  310. f.wg.Wait()
  311. }
  312. return nil
  313. }
  314. // appendBuffer appends data to buffer with lock.
  315. func (f *Fluent) appendBuffer(msg *msgToSend) error {
  316. f.pendingMutex.RLock()
  317. defer f.pendingMutex.RUnlock()
  318. if f.chanClosed {
  319. return fmt.Errorf("fluent#appendBuffer: Logger already closed")
  320. }
  321. select {
  322. case f.pending <- msg:
  323. default:
  324. return fmt.Errorf("fluent#appendBuffer: Buffer full, limit %v", f.Config.BufferLimit)
  325. }
  326. return nil
  327. }
  328. // close closes the connection.
  329. func (f *Fluent) close(c net.Conn) {
  330. f.muconn.Lock()
  331. if f.conn != nil && f.conn == c {
  332. f.conn.Close()
  333. f.conn = nil
  334. }
  335. f.muconn.Unlock()
  336. }
  337. // connect establishes a new connection using the specified transport.
  338. func (f *Fluent) connect() (err error) {
  339. switch f.Config.FluentNetwork {
  340. case "tcp":
  341. f.conn, err = f.dialer.Dial(
  342. f.Config.FluentNetwork,
  343. f.Config.FluentHost+":"+strconv.Itoa(f.Config.FluentPort))
  344. case "unix":
  345. f.conn, err = f.dialer.Dial(
  346. f.Config.FluentNetwork,
  347. f.Config.FluentSocketPath)
  348. default:
  349. err = NewErrUnknownNetwork(f.Config.FluentNetwork)
  350. }
  351. return err
  352. }
  353. func (f *Fluent) run() {
  354. drainEvents := false
  355. var emitEventDrainMsg sync.Once
  356. for {
  357. select {
  358. case entry, ok := <-f.pending:
  359. if !ok {
  360. f.wg.Done()
  361. return
  362. }
  363. if drainEvents {
  364. emitEventDrainMsg.Do(func() { fmt.Fprintf(os.Stderr, "[%s] Discarding queued events...\n", time.Now().Format(time.RFC3339)) })
  365. continue
  366. }
  367. err := f.write(entry)
  368. if err != nil {
  369. fmt.Fprintf(os.Stderr, "[%s] Unable to send logs to fluentd, reconnecting...\n", time.Now().Format(time.RFC3339))
  370. }
  371. }
  372. select {
  373. case stopRunning, ok := <-f.stopRunning:
  374. if stopRunning || !ok {
  375. drainEvents = true
  376. }
  377. default:
  378. }
  379. }
  380. }
  381. func e(x, y float64) int {
  382. return int(math.Pow(x, y))
  383. }
  384. func (f *Fluent) write(msg *msgToSend) error {
  385. var c net.Conn
  386. for i := 0; i < f.Config.MaxRetry; i++ {
  387. c = f.conn
  388. // Connect if needed
  389. if c == nil {
  390. f.muconn.Lock()
  391. if f.conn == nil {
  392. err := f.connect()
  393. if err != nil {
  394. f.muconn.Unlock()
  395. if _, ok := err.(*ErrUnknownNetwork); ok {
  396. // do not retry on unknown network error
  397. break
  398. }
  399. waitTime := f.Config.RetryWait * e(defaultReconnectWaitIncreRate, float64(i-1))
  400. if waitTime > f.Config.MaxRetryWait {
  401. waitTime = f.Config.MaxRetryWait
  402. }
  403. time.Sleep(time.Duration(waitTime) * time.Millisecond)
  404. continue
  405. }
  406. }
  407. c = f.conn
  408. f.muconn.Unlock()
  409. }
  410. // We're connected, write msg
  411. t := f.Config.WriteTimeout
  412. if time.Duration(0) < t {
  413. c.SetWriteDeadline(time.Now().Add(t))
  414. } else {
  415. c.SetWriteDeadline(time.Time{})
  416. }
  417. _, err := c.Write(msg.data)
  418. if err != nil {
  419. f.close(c)
  420. } else {
  421. // Acknowledgment check
  422. if msg.ack != "" {
  423. resp := &AckResp{}
  424. if f.Config.MarshalAsJSON {
  425. dec := json.NewDecoder(c)
  426. err = dec.Decode(resp)
  427. } else {
  428. r := msgp.NewReader(c)
  429. err = resp.DecodeMsg(r)
  430. }
  431. if err != nil || resp.Ack != msg.ack {
  432. f.close(c)
  433. continue
  434. }
  435. }
  436. return err
  437. }
  438. }
  439. return fmt.Errorf("fluent#write: failed to reconnect, max retry: %v", f.Config.MaxRetry)
  440. }