Переглянути джерело

Sync data files with hub index (#1153)

* Allow versioning, updating data files
Shivam Sandbhor 3 роки тому
батько
коміт
44d2eaad51

+ 1 - 1
cmd/crowdsec-cli/hub.go

@@ -56,7 +56,7 @@ cscli hub update # Download list of available configurations from the hub
 				log.Info(v)
 				log.Info(v)
 			}
 			}
 			cwhub.DisplaySummary()
 			cwhub.DisplaySummary()
-			ListItems([]string{cwhub.PARSERS, cwhub.COLLECTIONS, cwhub.SCENARIOS, cwhub.PARSERS_OVFLW}, args, true, false)
+			ListItems([]string{cwhub.PARSERS, cwhub.COLLECTIONS, cwhub.SCENARIOS, cwhub.PARSERS_OVFLW, cwhub.DATA_FILES}, args, true, false)
 		},
 		},
 	}
 	}
 	cmdHubList.PersistentFlags().BoolVarP(&all, "all", "a", false, "List disabled items as well")
 	cmdHubList.PersistentFlags().BoolVarP(&all, "all", "a", false, "List disabled items as well")

+ 10 - 11
cmd/crowdsec-cli/utils.go

@@ -250,22 +250,21 @@ func UpgradeConfig(itemType string, name string, force bool) {
 			continue
 			continue
 		}
 		}
 
 
+		found = true
+		if v.UpToDate || v.Tainted {
+			if v.UpToDate {
+				log.Infof("%s : up-to-date", v.Name)
+			}
+			if err = cwhub.DownloadDataIfNeeded(csConfig.Hub, v, force); err != nil {
+				log.Fatalf("%s : download failed : %v", v.Name, err)
+			}
+		}
+
 		if !v.Downloaded {
 		if !v.Downloaded {
 			log.Warningf("%s : not downloaded, please install.", v.Name)
 			log.Warningf("%s : not downloaded, please install.", v.Name)
 			continue
 			continue
 		}
 		}
 
 
-		found = true
-		if v.UpToDate {
-			log.Infof("%s : up-to-date", v.Name)
-
-			if !force {
-				if err = cwhub.DownloadDataIfNeeded(csConfig.Hub, v, false); err != nil {
-					log.Fatalf("%s : download failed : %v", v.Name, err)
-				}
-				continue
-			}
-		}
 		v, err = cwhub.DownloadLatest(csConfig.Hub, v, force, true)
 		v, err = cwhub.DownloadLatest(csConfig.Hub, v, force, true)
 		if err != nil {
 		if err != nil {
 			log.Fatalf("%s : download failed : %v", v.Name, err)
 			log.Fatalf("%s : download failed : %v", v.Name, err)

+ 7 - 7
pkg/cwhub/cwhub.go

@@ -22,11 +22,12 @@ var PARSERS = "parsers"
 var PARSERS_OVFLW = "postoverflows"
 var PARSERS_OVFLW = "postoverflows"
 var SCENARIOS = "scenarios"
 var SCENARIOS = "scenarios"
 var COLLECTIONS = "collections"
 var COLLECTIONS = "collections"
-var ItemTypes = []string{PARSERS, PARSERS_OVFLW, SCENARIOS, COLLECTIONS}
+var DATA_FILES = "data_files"
+var ItemTypes = []string{PARSERS, PARSERS_OVFLW, SCENARIOS, COLLECTIONS, DATA_FILES}
 
 
 var hubIdx map[string]map[string]Item
 var hubIdx map[string]map[string]Item
 
 
-var RawFileURLTemplate = "https://hub-cdn.crowdsec.net/%s/%s"
+var RawFileURLTemplate = "https://raw.githubusercontent.com/sbs2001/hub/%s/%s"
 var HubBranch = "master"
 var HubBranch = "master"
 var HubIndexFile = ".index.json"
 var HubIndexFile = ".index.json"
 
 
@@ -57,11 +58,10 @@ type Item struct {
 	BelongsToCollections []string `yaml:"belongs_to_collections,omitempty" json:"belongs_to_collections,omitempty"` /*if it's part of collections, track name here*/
 	BelongsToCollections []string `yaml:"belongs_to_collections,omitempty" json:"belongs_to_collections,omitempty"` /*if it's part of collections, track name here*/
 
 
 	/*remote (hub) infos*/
 	/*remote (hub) infos*/
-	RemoteURL  string                 `yaml:"remoteURL,omitempty" json:"remoteURL,omitempty"` //the full remote uri of file in http
-	RemotePath string                 `json:"path,omitempty" yaml:"remote_path,omitempty"`    //the path relative to git ie. /parsers/stage/author/file.yaml
-	RemoteHash string                 `yaml:"hash,omitempty" json:"hash,omitempty"`           //the meow
-	Version    string                 `json:"version,omitempty"`                              //the last version
-	Versions   map[string]ItemVersion `json:"versions,omitempty" yaml:"-"`                    //the list of existing versions
+	RemotePath string                 `json:"path,omitempty" yaml:"remote_path,omitempty"` //the path relative to git ie. /parsers/stage/author/file.yaml
+	RemoteHash string                 `yaml:"hash,omitempty" json:"hash,omitempty"`        //the meow
+	Version    string                 `json:"version,omitempty"`                           //the last version
+	Versions   map[string]ItemVersion `json:"versions,omitempty" yaml:"-"`                 //the list of existing versions
 
 
 	/*local (deployed) infos*/
 	/*local (deployed) infos*/
 	LocalPath string `yaml:"local_path,omitempty" json:"local_path,omitempty"` //the local path relative to ${CFG_DIR}
 	LocalPath string `yaml:"local_path,omitempty" json:"local_path,omitempty"` //the local path relative to ${CFG_DIR}

+ 7 - 6
pkg/cwhub/cwhub_test.go

@@ -383,8 +383,9 @@ func (t *mockTransport) RoundTrip(req *http.Request) (*http.Response, error) {
 	responseBody := ""
 	responseBody := ""
 	log.Printf("---> %s", req.URL.Path)
 	log.Printf("---> %s", req.URL.Path)
 
 
+
 	/*FAKE PARSER*/
 	/*FAKE PARSER*/
-	if req.URL.Path == "/master/parsers/s01-parse/crowdsecurity/foobar_parser.yaml" {
+	if strings.HasSuffix(req.URL.Path, "/master/parsers/s01-parse/crowdsecurity/foobar_parser.yaml") {
 		responseBody = `onsuccess: next_stage
 		responseBody = `onsuccess: next_stage
 filter: evt.Parsed.program == 'foobar_parser'
 filter: evt.Parsed.program == 'foobar_parser'
 name: crowdsecurity/foobar_parser
 name: crowdsecurity/foobar_parser
@@ -395,7 +396,7 @@ grok:
   apply_on: message
   apply_on: message
 `
 `
 
 
-	} else if req.URL.Path == "/master/parsers/s01-parse/crowdsecurity/foobar_subparser.yaml" {
+	} else if strings.HasSuffix(req.URL.Path, "/master/parsers/s01-parse/crowdsecurity/foobar_subparser.yaml") {
 		responseBody = `onsuccess: next_stage
 		responseBody = `onsuccess: next_stage
 filter: evt.Parsed.program == 'foobar_parser'
 filter: evt.Parsed.program == 'foobar_parser'
 name: crowdsecurity/foobar_parser
 name: crowdsecurity/foobar_parser
@@ -407,19 +408,19 @@ grok:
 `
 `
 		/*FAKE SCENARIO*/
 		/*FAKE SCENARIO*/
 
 
-	} else if req.URL.Path == "/master/scenarios/crowdsecurity/foobar_scenario.yaml" {
+	} else if strings.HasSuffix(req.URL.Path, "/master/scenarios/crowdsecurity/foobar_scenario.yaml") {
 		responseBody = `filter: true
 		responseBody = `filter: true
 name: crowdsecurity/foobar_scenario`
 name: crowdsecurity/foobar_scenario`
 		/*FAKE COLLECTIONS*/
 		/*FAKE COLLECTIONS*/
-	} else if req.URL.Path == "/master/collections/crowdsecurity/foobar.yaml" {
+	} else if strings.HasSuffix(req.URL.Path, "/master/collections/crowdsecurity/foobar.yaml") {
 		responseBody = `
 		responseBody = `
 blah: blalala
 blah: blalala
 qwe: jejwejejw`
 qwe: jejwejejw`
-	} else if req.URL.Path == "/master/collections/crowdsecurity/foobar_subcollection.yaml" {
+	} else if strings.HasSuffix(req.URL.Path, "/master/collections/crowdsecurity/foobar_subcollection.yaml") {
 		responseBody = `
 		responseBody = `
 blah: blalala
 blah: blalala
 qwe: jejwejejw`
 qwe: jejwejejw`
-	} else if req.URL.Path == "/master/.index.json" {
+	} else if strings.HasSuffix(req.URL.Path, "/master/.index.json") {
 		responseBody =
 		responseBody =
 			`{
 			`{
 				"collections": {
 				"collections": {

+ 65 - 18
pkg/cwhub/download.go

@@ -3,15 +3,17 @@ package cwhub
 import (
 import (
 	"bytes"
 	"bytes"
 	"crypto/sha256"
 	"crypto/sha256"
+	"io"
 	"path"
 	"path"
 	"path/filepath"
 	"path/filepath"
+	"sort"
 
 
 	//"errors"
 	//"errors"
 	"github.com/pkg/errors"
 	"github.com/pkg/errors"
+	"gopkg.in/yaml.v2"
 
 
 	//"errors"
 	//"errors"
 	"fmt"
 	"fmt"
-	"io"
 	"io/ioutil"
 	"io/ioutil"
 	"net/http"
 	"net/http"
 	"os"
 	"os"
@@ -20,7 +22,6 @@ import (
 	"github.com/crowdsecurity/crowdsec/pkg/csconfig"
 	"github.com/crowdsecurity/crowdsec/pkg/csconfig"
 	"github.com/crowdsecurity/crowdsec/pkg/types"
 	"github.com/crowdsecurity/crowdsec/pkg/types"
 	log "github.com/sirupsen/logrus"
 	log "github.com/sirupsen/logrus"
-	"gopkg.in/yaml.v2"
 )
 )
 
 
 func UpdateHubIdx(hub *csconfig.Hub) error {
 func UpdateHubIdx(hub *csconfig.Hub) error {
@@ -79,7 +80,6 @@ func DownloadHubIdx(hub *csconfig.Hub) ([]byte, error) {
 func DownloadLatest(hub *csconfig.Hub, target Item, overwrite bool, updateOnly bool) (Item, error) {
 func DownloadLatest(hub *csconfig.Hub, target Item, overwrite bool, updateOnly bool) (Item, error) {
 	var err error
 	var err error
 
 
-	log.Debugf("Downloading %s %s", target.Type, target.Name)
 	if target.Type == COLLECTIONS {
 	if target.Type == COLLECTIONS {
 		var tmp = [][]string{target.Parsers, target.PostOverflows, target.Scenarios, target.Collections}
 		var tmp = [][]string{target.Parsers, target.PostOverflows, target.Scenarios, target.Collections}
 		for idx, ptr := range tmp {
 		for idx, ptr := range tmp {
@@ -143,10 +143,19 @@ func DownloadItem(hub *csconfig.Hub, target Item, overwrite bool) (Item, error)
 			return target, nil
 			return target, nil
 		}
 		}
 		if target.UpToDate {
 		if target.UpToDate {
-			log.Debugf("%s : up-to-date, not updated", target.Name)
 			//  We still have to check if data files are present
 			//  We still have to check if data files are present
+			log.Debugf("%s : up-to-date, not updated", target.Name)
+			data, err := os.ReadFile(target.LocalPath)
+			if err != nil {
+				return target, err
+			}
+			if err := downloadData(dataFolder, target.Author, overwrite, bytes.NewReader(data)); err != nil {
+				return target, errors.Wrapf(err, "while downloading data for %s", target.FileName)
+			}
+			return target, nil
 		}
 		}
 	}
 	}
+	log.Debugf("Downloading %s %s", target.Type, target.Name)
 	req, err := http.NewRequest("GET", fmt.Sprintf(RawFileURLTemplate, HubBranch, target.RemotePath), nil)
 	req, err := http.NewRequest("GET", fmt.Sprintf(RawFileURLTemplate, HubBranch, target.RemotePath), nil)
 	if err != nil {
 	if err != nil {
 		return target, errors.Wrap(err, fmt.Sprintf("while downloading %s", req.URL.String()))
 		return target, errors.Wrap(err, fmt.Sprintf("while downloading %s", req.URL.String()))
@@ -214,7 +223,7 @@ func DownloadItem(hub *csconfig.Hub, target Item, overwrite bool) (Item, error)
 	target.Tainted = false
 	target.Tainted = false
 	target.UpToDate = true
 	target.UpToDate = true
 
 
-	if err = downloadData(dataFolder, overwrite, bytes.NewReader(body)); err != nil {
+	if err = downloadData(dataFolder, target.Author, overwrite, bytes.NewReader(body)); err != nil {
 		return target, errors.Wrapf(err, "while downloading data for %s", target.FileName)
 		return target, errors.Wrapf(err, "while downloading data for %s", target.FileName)
 	}
 	}
 
 
@@ -232,16 +241,15 @@ func DownloadDataIfNeeded(hub *csconfig.Hub, target Item, force bool) error {
 	if itemFile, err = os.Open(itemFilePath); err != nil {
 	if itemFile, err = os.Open(itemFilePath); err != nil {
 		return errors.Wrapf(err, "while opening %s", itemFilePath)
 		return errors.Wrapf(err, "while opening %s", itemFilePath)
 	}
 	}
-	if err = downloadData(dataFolder, force, itemFile); err != nil {
+	if err = downloadData(dataFolder, target.Author, force, itemFile); err != nil {
 		return errors.Wrapf(err, "while downloading data for %s", itemFilePath)
 		return errors.Wrapf(err, "while downloading data for %s", itemFilePath)
 	}
 	}
 	return nil
 	return nil
 }
 }
 
 
-func downloadData(dataFolder string, force bool, reader io.Reader) error {
+func downloadData(dataFolder string, parentItemAuthor string, force bool, reader io.Reader) error {
 	var err error
 	var err error
 	dec := yaml.NewDecoder(reader)
 	dec := yaml.NewDecoder(reader)
-
 	for {
 	for {
 		data := &types.DataSet{}
 		data := &types.DataSet{}
 		err = dec.Decode(data)
 		err = dec.Decode(data)
@@ -252,20 +260,59 @@ func downloadData(dataFolder string, force bool, reader io.Reader) error {
 			break
 			break
 		}
 		}
 
 
-		download := false
-		if !force {
-			for _, dataS := range data.Data {
-				if _, err := os.Stat(path.Join(dataFolder, dataS.DestPath)); os.IsNotExist(err) {
-					download = true
+		for _, dataS := range data.Data {
+			download := false
+			dfPath := path.Join(dataFolder, dataS.DestPath)
+			dataFileName := strings.Split(dataS.DestPath, ".")[0]
+			_, downloadFromHub := hubIdx[DATA_FILES][dataFileName]
+			if downloadFromHub {
+				dataS.SourceURL = fmt.Sprintf(RawFileURLTemplate, HubBranch, hubIdx[DATA_FILES][dataFileName].RemotePath)
+			}
+
+			if _, err := os.Stat(dfPath); os.IsNotExist(err) {
+				download = true
+			} else if downloadFromHub {
+				sha, err := getSHA256(dfPath)
+				if err != nil {
+					return err
 				}
 				}
+				download = dataFileHasUpdates(sha, dataFileName)
 			}
 			}
-		}
-		if download || force {
-			err = types.GetData(data.Data, dataFolder)
-			if err != nil {
-				return errors.Wrap(err, "while getting data")
+
+			log.Infof("%v has updates=%v", dataFileName, download)
+
+			if download || force {
+				err = types.GetData(dataS, dataFolder)
+				if err != nil {
+					return errors.Wrap(err, "while getting data")
+				}
 			}
 			}
+
 		}
 		}
 	}
 	}
+
 	return nil
 	return nil
 }
 }
+
+// Checks if the provided data file is latest. Only files which  are available in hub should
+// be checked for.
+func dataFileHasUpdates(fileSha string, dataFileName string) bool {
+	dataItem := hubIdx[DATA_FILES][dataFileName]
+	versions := make([]string, 0, len(dataItem.Versions))
+	for k := range dataItem.Versions {
+		versions = append(versions, k)
+	}
+	sort.Sort(sort.Reverse(sort.StringSlice(versions)))
+	for i, version := range versions {
+		if fileSha != dataItem.Versions[version].Digest {
+			continue
+		}
+		log.Debugf("data file %s matched sha with version %s", dataFileName, version)
+		if i != 0 {
+			log.Debugf("data file %s is outdated, updating to version %s", dataFileName, versions[0])
+			return true
+		}
+		break
+	}
+	return false
+}

+ 26 - 0
pkg/cwhub/download_test.go

@@ -40,3 +40,29 @@ func TestDownloadHubIdx(t *testing.T) {
 	RawFileURLTemplate = back
 	RawFileURLTemplate = back
 	fmt.Printf("->%+v", ret)
 	fmt.Printf("->%+v", ret)
 }
 }
+
+func TestDataFileIsLatest(t *testing.T) {
+	dataFileName := "crowdsecurity/sensitive-files"
+	hubIdx = map[string]map[string]Item{
+		"data_files": {
+			"crowdsecurity/sensitive-files": {
+				Versions: map[string]ItemVersion{
+					"0.1": {Digest: "1"},
+					"0.2": {Digest: "2"},
+				},
+			},
+		},
+	}
+	if dataFileHasUpdates("1", dataFileName) {
+		log.Errorf(`expected dataFileIsLatest("1", %s) = true found false `, dataFileName)
+	}
+
+	if !dataFileHasUpdates("2", dataFileName) {
+		log.Errorf(`expected dataFileIsLatest("2", %s) = false found true `, dataFileName)
+	}
+
+	// data file is tainted
+	if dataFileHasUpdates("3", dataFileName) {
+		log.Errorf(`expected dataFileIsLatest("3", %s) = false found true `, dataFileName)
+	}
+}

+ 70 - 39
pkg/cwhub/loader.go

@@ -21,10 +21,10 @@ import (
 )
 )
 
 
 /*the walk/parser_visit function can't receive extra args*/
 /*the walk/parser_visit function can't receive extra args*/
-var hubdir, installdir string
+var hubdir, installdir, datadir string
 
 
+// TODO: Break this function into smaller functions.
 func parser_visit(path string, f os.FileInfo, err error) error {
 func parser_visit(path string, f os.FileInfo, err error) error {
-
 	var target Item
 	var target Item
 	var local bool
 	var local bool
 	var hubpath string
 	var hubpath string
@@ -42,16 +42,17 @@ func parser_visit(path string, f os.FileInfo, err error) error {
 	if f == nil || f.IsDir() {
 	if f == nil || f.IsDir() {
 		return nil
 		return nil
 	}
 	}
-	//we only care about yaml files
-	if !strings.HasSuffix(f.Name(), ".yaml") && !strings.HasSuffix(f.Name(), ".yml") {
+	// yamls -> collections, parsers, overflows etc. txt, mmdb -> data files
+	if !strings.HasSuffix(f.Name(), ".yaml") && !strings.HasSuffix(f.Name(), ".yml") && !strings.HasSuffix(f.Name(), ".txt") && !strings.HasSuffix(f.Name(), ".mmdb") {
 		return nil
 		return nil
 	}
 	}
 
 
 	subs := strings.Split(path, "/")
 	subs := strings.Split(path, "/")
 
 
-	log.Tracef("path:%s, hubdir:%s, installdir:%s", path, hubdir, installdir)
+	log.Tracef("path:%s, hubdir:%s, installdir:%s datadir%s", path, hubdir, installdir, datadir)
 	/*we're in hub (~/.hub/hub/)*/
 	/*we're in hub (~/.hub/hub/)*/
-	if strings.HasPrefix(path, hubdir) {
+
+	hubDirSetter := func() {
 		log.Tracef("in hub dir")
 		log.Tracef("in hub dir")
 		inhub = true
 		inhub = true
 		//.../hub/parsers/s00-raw/crowdsec/skip-pretag.yaml
 		//.../hub/parsers/s00-raw/crowdsec/skip-pretag.yaml
@@ -64,7 +65,18 @@ func parser_visit(path string, f os.FileInfo, err error) error {
 		fauthor = subs[len(subs)-2]
 		fauthor = subs[len(subs)-2]
 		stage = subs[len(subs)-3]
 		stage = subs[len(subs)-3]
 		ftype = subs[len(subs)-4]
 		ftype = subs[len(subs)-4]
-	} else if strings.HasPrefix(path, installdir) { /*we're in install /etc/crowdsec/<type>/... */
+	}
+
+	dataDirSetter := func() {
+		log.Tracef("in data dir")
+		fauthor = ""
+		fname = subs[len(subs)-1]
+		stage = ""
+		ftype = DATA_FILES
+		fauthor = ""
+	}
+
+	installDirSetter := func() {
 		log.Tracef("in install dir")
 		log.Tracef("in install dir")
 		if len(subs) < 3 {
 		if len(subs) < 3 {
 			log.Fatalf("path is too short : %s (%d)", path, len(subs))
 			log.Fatalf("path is too short : %s (%d)", path, len(subs))
@@ -76,23 +88,42 @@ func parser_visit(path string, f os.FileInfo, err error) error {
 		fname = subs[len(subs)-1]
 		fname = subs[len(subs)-1]
 		stage = subs[len(subs)-2]
 		stage = subs[len(subs)-2]
 		ftype = subs[len(subs)-3]
 		ftype = subs[len(subs)-3]
-		fauthor = ""
-	} else {
-		return fmt.Errorf("File '%s' is not from hub '%s' nor from the configuration directory '%s'", path, hubdir, installdir)
 	}
 	}
 
 
+	setterByPath := map[string]func(){
+		installdir: installDirSetter,
+		hubdir:     hubDirSetter,
+		datadir:    dataDirSetter,
+	}
+
+	paths := []string{installdir, hubdir, datadir}
+	sort.Slice(paths, func(i, j int) bool {
+		return len(paths[i]) > len(paths[j])
+	})
+	foundMatch := false
+	for _, p := range paths {
+		if strings.HasPrefix(path, p) {
+			setterByPath[p]()
+			foundMatch = true
+			break
+		}
+	}
+	if !foundMatch {
+		return fmt.Errorf("file '%s' is not from hub '%s' nor from the configuration directory '%s'", path, hubdir, installdir)
+	}
 	log.Tracef("stage:%s ftype:%s", stage, ftype)
 	log.Tracef("stage:%s ftype:%s", stage, ftype)
 	//log.Printf("%s -> name:%s stage:%s", path, fname, stage)
 	//log.Printf("%s -> name:%s stage:%s", path, fname, stage)
+
+	// correct the stage and type for non-stage stuff.
 	if stage == SCENARIOS {
 	if stage == SCENARIOS {
 		ftype = SCENARIOS
 		ftype = SCENARIOS
 		stage = ""
 		stage = ""
 	} else if stage == COLLECTIONS {
 	} else if stage == COLLECTIONS {
 		ftype = COLLECTIONS
 		ftype = COLLECTIONS
 		stage = ""
 		stage = ""
-	} else if ftype != PARSERS && ftype != PARSERS_OVFLW /*its a PARSER / PARSER_OVFLW with a stage */ {
+	} else if ftype != PARSERS && ftype != PARSERS_OVFLW && ftype != DATA_FILES { /*its a PARSER / PARSER_OVFLW with a stage */
 		return fmt.Errorf("unknown configuration type for file '%s'", path)
 		return fmt.Errorf("unknown configuration type for file '%s'", path)
 	}
 	}
-
 	log.Tracef("CORRECTED [%s] by [%s] in stage [%s] of type [%s]", fname, fauthor, stage, ftype)
 	log.Tracef("CORRECTED [%s] by [%s] in stage [%s] of type [%s]", fname, fauthor, stage, ftype)
 
 
 	/*
 	/*
@@ -100,7 +131,7 @@ func parser_visit(path string, f os.FileInfo, err error) error {
 		/etc/crowdsec/.../collections/linux.yaml -> ~/.hub/hub/collections/.../linux.yaml
 		/etc/crowdsec/.../collections/linux.yaml -> ~/.hub/hub/collections/.../linux.yaml
 		when the collection is installed, both files are created
 		when the collection is installed, both files are created
 	*/
 	*/
-	//non symlinks are local user files or hub files
+	//non symlinks are local user files or hub files or data files
 	if f.Mode()&os.ModeSymlink == 0 {
 	if f.Mode()&os.ModeSymlink == 0 {
 		local = true
 		local = true
 		log.Tracef("%s isn't a symlink", path)
 		log.Tracef("%s isn't a symlink", path)
@@ -122,8 +153,8 @@ func parser_visit(path string, f os.FileInfo, err error) error {
 		log.Tracef("%s points to %s", path, hubpath)
 		log.Tracef("%s points to %s", path, hubpath)
 	}
 	}
 
 
-	//if it's not a symlink and not in hub, it's a local file, don't bother
-	if local && !inhub {
+	//if it's not a symlink and not in hub nor it is a data file. Don't bother checking this with index
+	if local && !inhub && ftype != DATA_FILES {
 		log.Tracef("%s is a local file, skip", path)
 		log.Tracef("%s is a local file, skip", path)
 		skippedLocal++
 		skippedLocal++
 		//	log.Printf("local scenario, skip.")
 		//	log.Printf("local scenario, skip.")
@@ -144,7 +175,7 @@ func parser_visit(path string, f os.FileInfo, err error) error {
 	log.Tracef("check [%s] of %s", fname, ftype)
 	log.Tracef("check [%s] of %s", fname, ftype)
 
 
 	match := false
 	match := false
-	for k, v := range hubIdx[ftype] {
+	for k, v := range hubIdx[ftype] { // eg ftype = "collections", k = crowdsecurity/nginx, v is an Item struct
 		log.Tracef("check [%s] vs [%s] : %s", fname, v.RemotePath, ftype+"/"+stage+"/"+fname+".yaml")
 		log.Tracef("check [%s] vs [%s] : %s", fname, v.RemotePath, ftype+"/"+stage+"/"+fname+".yaml")
 		if fname != v.FileName {
 		if fname != v.FileName {
 			log.Tracef("%s != %s (filename)", fname, v.FileName)
 			log.Tracef("%s != %s (filename)", fname, v.FileName)
@@ -152,6 +183,7 @@ func parser_visit(path string, f os.FileInfo, err error) error {
 		}
 		}
 		//wrong stage
 		//wrong stage
 		if v.Stage != stage {
 		if v.Stage != stage {
+			log.Tracef("%s != %s (stage)", v.Stage, stage)
 			continue
 			continue
 		}
 		}
 		/*if we are walking hub dir, just mark present files as downloaded*/
 		/*if we are walking hub dir, just mark present files as downloaded*/
@@ -168,14 +200,6 @@ func parser_visit(path string, f os.FileInfo, err error) error {
 				log.Tracef("marking %s as downloaded", v.Name)
 				log.Tracef("marking %s as downloaded", v.Name)
 				v.Downloaded = true
 				v.Downloaded = true
 			}
 			}
-		} else {
-			//wrong file
-			//<type>/<stage>/<author>/<name>.yaml
-			if !strings.HasSuffix(hubpath, v.RemotePath) {
-				//log.Printf("wrong file %s %s", hubpath, spew.Sdump(v))
-
-				continue
-			}
 		}
 		}
 		sha, err := getSHA256(path)
 		sha, err := getSHA256(path)
 		if err != nil {
 		if err != nil {
@@ -305,11 +329,18 @@ func CollecDepsCheck(v *Item) error {
 func SyncDir(hub *csconfig.Hub, dir string) (error, []string) {
 func SyncDir(hub *csconfig.Hub, dir string) (error, []string) {
 	hubdir = hub.HubDir
 	hubdir = hub.HubDir
 	installdir = hub.ConfigDir
 	installdir = hub.ConfigDir
+	datadir = hub.DataDir
 	warnings := []string{}
 	warnings := []string{}
 
 
-	/*For each, scan PARSERS, PARSERS_OVFLW, SCENARIOS and COLLECTIONS last*/
+	/*For each, scan PARSERS, PARSERS_OVFLW, DATA_FILES, SCENARIOS and COLLECTIONS last*/
 	for _, scan := range ItemTypes {
 	for _, scan := range ItemTypes {
-		cpath, err := filepath.Abs(fmt.Sprintf("%s/%s", dir, scan))
+		var cpath string
+		var err error
+		if scan == DATA_FILES {
+			cpath, err = filepath.Abs(hub.DataDir)
+		} else {
+			cpath, err = filepath.Abs(fmt.Sprintf("%s/%s", dir, scan))
+		}
 		if err != nil {
 		if err != nil {
 			log.Errorf("failed %s : %s", cpath, err)
 			log.Errorf("failed %s : %s", cpath, err)
 		}
 		}
@@ -317,24 +348,24 @@ func SyncDir(hub *csconfig.Hub, dir string) (error, []string) {
 		if err != nil {
 		if err != nil {
 			return err, warnings
 			return err, warnings
 		}
 		}
-
 	}
 	}
 
 
 	for k, v := range hubIdx[COLLECTIONS] {
 	for k, v := range hubIdx[COLLECTIONS] {
-		if v.Installed {
-			versStat := GetVersionStatus(&v)
-			if versStat == 0 { //latest
-				if err := CollecDepsCheck(&v); err != nil {
-					warnings = append(warnings, fmt.Sprintf("dependency of %s : %s", v.Name, err))
-					hubIdx[COLLECTIONS][k] = v
-				}
-			} else if versStat == 1 { //not up-to-date
-				warnings = append(warnings, fmt.Sprintf("update for collection %s available (currently:%s, latest:%s)", v.Name, v.LocalVersion, v.Version))
-			} else { //version is higher than the highest available from hub?
-				warnings = append(warnings, fmt.Sprintf("collection %s is in the future (currently:%s, latest:%s)", v.Name, v.LocalVersion, v.Version))
+		if !v.Installed {
+			continue
+		}
+		versStat := GetVersionStatus(&v)
+		if versStat == 0 { //latest
+			if err := CollecDepsCheck(&v); err != nil {
+				warnings = append(warnings, fmt.Sprintf("dependency of %s : %s", v.Name, err))
+				hubIdx[COLLECTIONS][k] = v
 			}
 			}
-			log.Debugf("installed (%s) - status:%d | installed:%s | latest : %s | full : %+v", v.Name, semver.Compare("v"+v.Version, "v"+v.LocalVersion), v.LocalVersion, v.Version, v.Versions)
+		} else if versStat == 1 { //not up-to-date
+			warnings = append(warnings, fmt.Sprintf("update for collection %s available (currently:%s, latest:%s)", v.Name, v.LocalVersion, v.Version))
+		} else { //version is higher than the highest available from hub?
+			warnings = append(warnings, fmt.Sprintf("collection %s is in the future (currently:%s, latest:%s)", v.Name, v.LocalVersion, v.Version))
 		}
 		}
+		log.Debugf("installed (%s) - status:%d | installed:%s | latest : %s | full : %+v", v.Name, semver.Compare("v"+v.Version, "v"+v.LocalVersion), v.LocalVersion, v.Version, v.Versions)
 	}
 	}
 	return nil, warnings
 	return nil, warnings
 }
 }

+ 11 - 8
pkg/types/dataset.go

@@ -6,6 +6,7 @@ import (
 	"net/http"
 	"net/http"
 	"os"
 	"os"
 	"path"
 	"path"
+	"path/filepath"
 
 
 	log "github.com/sirupsen/logrus"
 	log "github.com/sirupsen/logrus"
 )
 )
@@ -42,6 +43,10 @@ func downloadFile(url string, destPath string) error {
 		return fmt.Errorf("download response 'HTTP %d' : %s", resp.StatusCode, string(body))
 		return fmt.Errorf("download response 'HTTP %d' : %s", resp.StatusCode, string(body))
 	}
 	}
 
 
+	if err := os.MkdirAll(filepath.Dir(destPath), 0666); err != nil {
+		return err
+	}
+
 	file, err := os.OpenFile(destPath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644)
 	file, err := os.OpenFile(destPath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644)
 	if err != nil {
 	if err != nil {
 		return err
 		return err
@@ -60,14 +65,12 @@ func downloadFile(url string, destPath string) error {
 	return nil
 	return nil
 }
 }
 
 
-func GetData(data []*DataSource, dataDir string) error {
-	for _, dataS := range data {
-		destPath := path.Join(dataDir, dataS.DestPath)
-		log.Infof("downloading data '%s' in '%s'", dataS.SourceURL, destPath)
-		err := downloadFile(dataS.SourceURL, destPath)
-		if err != nil {
-			return err
-		}
+func GetData(dataS *DataSource, dataDir string) error {
+	destPath := path.Join(dataDir, dataS.DestPath)
+	log.Infof("downloading data '%s' in '%s'", dataS.SourceURL, destPath)
+	err := downloadFile(dataS.SourceURL, destPath)
+	if err != nil {
+		return err
 	}
 	}
 
 
 	return nil
 	return nil