metrics.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498
  1. package main
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "net/http"
  8. "strconv"
  9. "strings"
  10. "time"
  11. "github.com/fatih/color"
  12. dto "github.com/prometheus/client_model/go"
  13. "github.com/prometheus/prom2json"
  14. log "github.com/sirupsen/logrus"
  15. "github.com/spf13/cobra"
  16. "gopkg.in/yaml.v3"
  17. "github.com/crowdsecurity/go-cs-lib/maptools"
  18. "github.com/crowdsecurity/go-cs-lib/trace"
  19. )
  20. type (
  21. statAcquis map[string]map[string]int
  22. statParser map[string]map[string]int
  23. statBucket map[string]map[string]int
  24. statWhitelist map[string]map[string]map[string]int
  25. statLapi map[string]map[string]int
  26. statLapiMachine map[string]map[string]map[string]int
  27. statLapiBouncer map[string]map[string]map[string]int
  28. statLapiDecision map[string]struct {
  29. NonEmpty int
  30. Empty int
  31. }
  32. statDecision map[string]map[string]map[string]int
  33. statAppsecEngine map[string]map[string]int
  34. statAppsecRule map[string]map[string]map[string]int
  35. statAlert map[string]int
  36. statStash map[string]struct {
  37. Type string
  38. Count int
  39. }
  40. )
  41. var (
  42. ErrMissingConfig = errors.New("prometheus section missing, can't show metrics")
  43. ErrMetricsDisabled = errors.New("prometheus is not enabled, can't show metrics")
  44. )
  45. type metricSection interface {
  46. Table(out io.Writer, noUnit bool, showEmpty bool)
  47. Description() (string, string)
  48. }
  49. type metricStore map[string]metricSection
  50. func NewMetricStore() metricStore {
  51. return metricStore{
  52. "acquisition": statAcquis{},
  53. "scenarios": statBucket{},
  54. "parsers": statParser{},
  55. "lapi": statLapi{},
  56. "lapi-machine": statLapiMachine{},
  57. "lapi-bouncer": statLapiBouncer{},
  58. "lapi-decisions": statLapiDecision{},
  59. "decisions": statDecision{},
  60. "alerts": statAlert{},
  61. "stash": statStash{},
  62. "appsec-engine": statAppsecEngine{},
  63. "appsec-rule": statAppsecRule{},
  64. "whitelists": statWhitelist{},
  65. }
  66. }
  67. func (ms metricStore) Fetch(url string) error {
  68. mfChan := make(chan *dto.MetricFamily, 1024)
  69. errChan := make(chan error, 1)
  70. // Start with the DefaultTransport for sane defaults.
  71. transport := http.DefaultTransport.(*http.Transport).Clone()
  72. // Conservatively disable HTTP keep-alives as this program will only
  73. // ever need a single HTTP request.
  74. transport.DisableKeepAlives = true
  75. // Timeout early if the server doesn't even return the headers.
  76. transport.ResponseHeaderTimeout = time.Minute
  77. go func() {
  78. defer trace.CatchPanic("crowdsec/ShowPrometheus")
  79. err := prom2json.FetchMetricFamilies(url, mfChan, transport)
  80. if err != nil {
  81. errChan <- fmt.Errorf("failed to fetch metrics: %w", err)
  82. return
  83. }
  84. errChan <- nil
  85. }()
  86. result := []*prom2json.Family{}
  87. for mf := range mfChan {
  88. result = append(result, prom2json.NewFamily(mf))
  89. }
  90. if err := <-errChan; err != nil {
  91. return err
  92. }
  93. log.Debugf("Finished reading metrics output, %d entries", len(result))
  94. /*walk*/
  95. mAcquis := ms["acquisition"].(statAcquis)
  96. mParser := ms["parsers"].(statParser)
  97. mBucket := ms["scenarios"].(statBucket)
  98. mLapi := ms["lapi"].(statLapi)
  99. mLapiMachine := ms["lapi-machine"].(statLapiMachine)
  100. mLapiBouncer := ms["lapi-bouncer"].(statLapiBouncer)
  101. mLapiDecision := ms["lapi-decisions"].(statLapiDecision)
  102. mDecision := ms["decisions"].(statDecision)
  103. mAppsecEngine := ms["appsec-engine"].(statAppsecEngine)
  104. mAppsecRule := ms["appsec-rule"].(statAppsecRule)
  105. mAlert := ms["alerts"].(statAlert)
  106. mStash := ms["stash"].(statStash)
  107. mWhitelist := ms["whitelists"].(statWhitelist)
  108. for idx, fam := range result {
  109. if !strings.HasPrefix(fam.Name, "cs_") {
  110. continue
  111. }
  112. log.Tracef("round %d", idx)
  113. for _, m := range fam.Metrics {
  114. metric, ok := m.(prom2json.Metric)
  115. if !ok {
  116. log.Debugf("failed to convert metric to prom2json.Metric")
  117. continue
  118. }
  119. name, ok := metric.Labels["name"]
  120. if !ok {
  121. log.Debugf("no name in Metric %v", metric.Labels)
  122. }
  123. source, ok := metric.Labels["source"]
  124. if !ok {
  125. log.Debugf("no source in Metric %v for %s", metric.Labels, fam.Name)
  126. } else {
  127. if srctype, ok := metric.Labels["type"]; ok {
  128. source = srctype + ":" + source
  129. }
  130. }
  131. value := m.(prom2json.Metric).Value
  132. machine := metric.Labels["machine"]
  133. bouncer := metric.Labels["bouncer"]
  134. route := metric.Labels["route"]
  135. method := metric.Labels["method"]
  136. reason := metric.Labels["reason"]
  137. origin := metric.Labels["origin"]
  138. action := metric.Labels["action"]
  139. appsecEngine := metric.Labels["appsec_engine"]
  140. appsecRule := metric.Labels["rule_name"]
  141. mtype := metric.Labels["type"]
  142. fval, err := strconv.ParseFloat(value, 32)
  143. if err != nil {
  144. log.Errorf("Unexpected int value %s : %s", value, err)
  145. }
  146. ival := int(fval)
  147. switch fam.Name {
  148. //
  149. // buckets
  150. //
  151. case "cs_bucket_created_total":
  152. mBucket.Process(name, "instantiation", ival)
  153. case "cs_buckets":
  154. mBucket.Process(name, "curr_count", ival)
  155. case "cs_bucket_overflowed_total":
  156. mBucket.Process(name, "overflow", ival)
  157. case "cs_bucket_poured_total":
  158. mBucket.Process(name, "pour", ival)
  159. mAcquis.Process(source, "pour", ival)
  160. case "cs_bucket_underflowed_total":
  161. mBucket.Process(name, "underflow", ival)
  162. //
  163. // parsers
  164. //
  165. case "cs_parser_hits_total":
  166. mAcquis.Process(source, "reads", ival)
  167. case "cs_parser_hits_ok_total":
  168. mAcquis.Process(source, "parsed", ival)
  169. case "cs_parser_hits_ko_total":
  170. mAcquis.Process(source, "unparsed", ival)
  171. case "cs_node_hits_total":
  172. mParser.Process(name, "hits", ival)
  173. case "cs_node_hits_ok_total":
  174. mParser.Process(name, "parsed", ival)
  175. case "cs_node_hits_ko_total":
  176. mParser.Process(name, "unparsed", ival)
  177. //
  178. // whitelists
  179. //
  180. case "cs_node_wl_hits_total":
  181. mWhitelist.Process(name, reason, "hits", ival)
  182. case "cs_node_wl_hits_ok_total":
  183. mWhitelist.Process(name, reason, "whitelisted", ival)
  184. // track as well whitelisted lines at acquis level
  185. mAcquis.Process(source, "whitelisted", ival)
  186. //
  187. // lapi
  188. //
  189. case "cs_lapi_route_requests_total":
  190. mLapi.Process(route, method, ival)
  191. case "cs_lapi_machine_requests_total":
  192. mLapiMachine.Process(machine, route, method, ival)
  193. case "cs_lapi_bouncer_requests_total":
  194. mLapiBouncer.Process(bouncer, route, method, ival)
  195. case "cs_lapi_decisions_ko_total", "cs_lapi_decisions_ok_total":
  196. mLapiDecision.Process(bouncer, fam.Name, ival)
  197. //
  198. // decisions
  199. //
  200. case "cs_active_decisions":
  201. mDecision.Process(reason, origin, action, ival)
  202. case "cs_alerts":
  203. mAlert.Process(reason, ival)
  204. //
  205. // stash
  206. //
  207. case "cs_cache_size":
  208. mStash.Process(name, mtype, ival)
  209. //
  210. // appsec
  211. //
  212. case "cs_appsec_reqs_total":
  213. mAppsecEngine.Process(appsecEngine, "processed", ival)
  214. case "cs_appsec_block_total":
  215. mAppsecEngine.Process(appsecEngine, "blocked", ival)
  216. case "cs_appsec_rule_hits":
  217. mAppsecRule.Process(appsecEngine, appsecRule, "triggered", ival)
  218. default:
  219. log.Debugf("unknown: %+v", fam.Name)
  220. continue
  221. }
  222. }
  223. }
  224. return nil
  225. }
  226. type cliMetrics struct {
  227. cfg configGetter
  228. }
  229. func NewCLIMetrics(cfg configGetter) *cliMetrics {
  230. return &cliMetrics{
  231. cfg: cfg,
  232. }
  233. }
  234. func (ms metricStore) Format(out io.Writer, sections []string, formatType string, noUnit bool) error {
  235. // copy only the sections we want
  236. want := map[string]metricSection{}
  237. // if explicitly asking for sections, we want to show empty tables
  238. showEmpty := len(sections) > 0
  239. // if no sections are specified, we want all of them
  240. if len(sections) == 0 {
  241. for section := range ms {
  242. sections = append(sections, section)
  243. }
  244. }
  245. for _, section := range sections {
  246. want[section] = ms[section]
  247. }
  248. switch formatType {
  249. case "human":
  250. for section := range want {
  251. want[section].Table(out, noUnit, showEmpty)
  252. }
  253. case "json":
  254. x, err := json.MarshalIndent(want, "", " ")
  255. if err != nil {
  256. return fmt.Errorf("failed to marshal metrics: %w", err)
  257. }
  258. out.Write(x)
  259. case "raw":
  260. x, err := yaml.Marshal(want)
  261. if err != nil {
  262. return fmt.Errorf("failed to marshal metrics: %w", err)
  263. }
  264. out.Write(x)
  265. default:
  266. return fmt.Errorf("unknown format type %s", formatType)
  267. }
  268. return nil
  269. }
  270. func (cli *cliMetrics) show(sections []string, url string, noUnit bool) error {
  271. cfg := cli.cfg()
  272. if url != "" {
  273. cfg.Cscli.PrometheusUrl = url
  274. }
  275. if cfg.Prometheus == nil {
  276. return ErrMissingConfig
  277. }
  278. if !cfg.Prometheus.Enabled {
  279. return ErrMetricsDisabled
  280. }
  281. ms := NewMetricStore()
  282. if err := ms.Fetch(cfg.Cscli.PrometheusUrl); err != nil {
  283. return err
  284. }
  285. // any section that we don't have in the store is an error
  286. for _, section := range sections {
  287. if _, ok := ms[section]; !ok {
  288. return fmt.Errorf("unknown metrics type: %s", section)
  289. }
  290. }
  291. if err := ms.Format(color.Output, sections, cfg.Cscli.Output, noUnit); err != nil {
  292. return err
  293. }
  294. return nil
  295. }
  296. func (cli *cliMetrics) NewCommand() *cobra.Command {
  297. var (
  298. url string
  299. noUnit bool
  300. )
  301. cmd := &cobra.Command{
  302. Use: "metrics",
  303. Short: "Display crowdsec prometheus metrics.",
  304. Long: `Fetch metrics from a Local API server and display them`,
  305. Example: `# Show all Metrics, skip empty tables (same as "cecli metrics show")
  306. cscli metrics
  307. # Show only some metrics, connect to a different url
  308. cscli metrics --url http://lapi.local:6060/metrics show acquisition parsers
  309. # List available metric types
  310. cscli metrics list`,
  311. Args: cobra.ExactArgs(0),
  312. DisableAutoGenTag: true,
  313. RunE: func(cmd *cobra.Command, args []string) error {
  314. return cli.show(nil, url, noUnit)
  315. },
  316. }
  317. flags := cmd.Flags()
  318. flags.StringVarP(&url, "url", "u", "", "Prometheus url (http://<ip>:<port>/metrics)")
  319. flags.BoolVar(&noUnit, "no-unit", false, "Show the real number instead of formatted with units")
  320. cmd.AddCommand(cli.newShowCmd())
  321. cmd.AddCommand(cli.newListCmd())
  322. return cmd
  323. }
  324. // expandAlias returns a list of sections. The input can be a list of sections or alias.
  325. func (cli *cliMetrics) expandSectionGroups(args []string) []string {
  326. ret := []string{}
  327. for _, section := range args {
  328. switch section {
  329. case "engine":
  330. ret = append(ret, "acquisition", "parsers", "scenarios", "stash", "whitelists")
  331. case "lapi":
  332. ret = append(ret, "alerts", "decisions", "lapi", "lapi-bouncer", "lapi-decisions", "lapi-machine")
  333. case "appsec":
  334. ret = append(ret, "appsec-engine", "appsec-rule")
  335. default:
  336. ret = append(ret, section)
  337. }
  338. }
  339. return ret
  340. }
  341. func (cli *cliMetrics) newShowCmd() *cobra.Command {
  342. var (
  343. url string
  344. noUnit bool
  345. )
  346. cmd := &cobra.Command{
  347. Use: "show [type]...",
  348. Short: "Display all or part of the available metrics.",
  349. Long: `Fetch metrics from a Local API server and display them, optionally filtering on specific types.`,
  350. Example: `# Show all Metrics, skip empty tables
  351. cscli metrics show
  352. # Use an alias: "engine", "lapi" or "appsec" to show a group of metrics
  353. cscli metrics show engine
  354. # Show some specific metrics, show empty tables, connect to a different url
  355. cscli metrics show acquisition parsers scenarios stash --url http://lapi.local:6060/metrics
  356. # To list available metric types, use "cscli metrics list"
  357. cscli metrics list; cscli metrics list -o json
  358. # Show metrics in json format
  359. cscli metrics show acquisition parsers scenarios stash -o json`,
  360. // Positional args are optional
  361. DisableAutoGenTag: true,
  362. RunE: func(_ *cobra.Command, args []string) error {
  363. args = cli.expandSectionGroups(args)
  364. return cli.show(args, url, noUnit)
  365. },
  366. }
  367. flags := cmd.Flags()
  368. flags.StringVarP(&url, "url", "u", "", "Metrics url (http://<ip>:<port>/metrics)")
  369. flags.BoolVar(&noUnit, "no-unit", false, "Show the real number instead of formatted with units")
  370. return cmd
  371. }
  372. func (cli *cliMetrics) list() error {
  373. type metricType struct {
  374. Type string `json:"type" yaml:"type"`
  375. Title string `json:"title" yaml:"title"`
  376. Description string `json:"description" yaml:"description"`
  377. }
  378. var allMetrics []metricType
  379. ms := NewMetricStore()
  380. for _, section := range maptools.SortedKeys(ms) {
  381. title, description := ms[section].Description()
  382. allMetrics = append(allMetrics, metricType{
  383. Type: section,
  384. Title: title,
  385. Description: description,
  386. })
  387. }
  388. switch cli.cfg().Cscli.Output {
  389. case "human":
  390. t := newTable(color.Output)
  391. t.SetRowLines(true)
  392. t.SetHeaders("Type", "Title", "Description")
  393. for _, metric := range allMetrics {
  394. t.AddRow(metric.Type, metric.Title, metric.Description)
  395. }
  396. t.Render()
  397. case "json":
  398. x, err := json.MarshalIndent(allMetrics, "", " ")
  399. if err != nil {
  400. return fmt.Errorf("failed to marshal metric types: %w", err)
  401. }
  402. fmt.Println(string(x))
  403. case "raw":
  404. x, err := yaml.Marshal(allMetrics)
  405. if err != nil {
  406. return fmt.Errorf("failed to marshal metric types: %w", err)
  407. }
  408. fmt.Println(string(x))
  409. }
  410. return nil
  411. }
  412. func (cli *cliMetrics) newListCmd() *cobra.Command {
  413. cmd := &cobra.Command{
  414. Use: "list",
  415. Short: "List available types of metrics.",
  416. Long: `List available types of metrics.`,
  417. Args: cobra.ExactArgs(0),
  418. DisableAutoGenTag: true,
  419. RunE: func(_ *cobra.Command, _ []string) error {
  420. return cli.list()
  421. },
  422. }
  423. return cmd
  424. }