From 3d2113ff0b80d952c2b10bdf20801f8fcab6057f Mon Sep 17 00:00:00 2001 From: Fabio Bonelli Date: Tue, 4 Jun 2024 16:14:25 +0200 Subject: [PATCH] feat: use publiccode-parser-go v4 --- crawler/crawler.go | 45 ++++++++++++++++++++++++++------------------- go.mod | 2 +- go.sum | 4 ++-- 3 files changed, 29 insertions(+), 22 deletions(-) diff --git a/crawler/crawler.go b/crawler/crawler.go index 92c304a..a79c3bd 100644 --- a/crawler/crawler.go +++ b/crawler/crawler.go @@ -18,7 +18,7 @@ import ( "github.com/italia/publiccode-crawler/v4/git" "github.com/italia/publiccode-crawler/v4/metrics" "github.com/italia/publiccode-crawler/v4/scanner" - publiccode "github.com/italia/publiccode-parser-go/v3" + publiccode "github.com/italia/publiccode-parser-go/v4" log "github.com/sirupsen/logrus" "github.com/spf13/viper" "golang.org/x/exp/slices" @@ -327,15 +327,6 @@ func (c *Crawler) ProcessRepo(repository common.Repository) { //nolint:maintidx ), ) - var parser *publiccode.Parser - parser, err = publiccode.NewParser(repository.FileRawURL) - if err != nil { - logEntries = append(logEntries, fmt.Sprintf("[%s] BAD publiccode.yml: %s\n", repository.Name, err.Error())) - metrics.GetCounter("repository_bad_publiccodeyml", c.Index).Inc() - - return - } - //nolint:godox // FIXME: this is hardcoded for now, because it requires changes to publiccode-parser-go. domain := publiccode.Domain{ @@ -344,9 +335,22 @@ func (c *Crawler) ProcessRepo(repository common.Repository) { //nolint:maintidx BasicAuth: []string{os.Getenv("GITHUB_TOKEN")}, } + var parser *publiccode.Parser + parser, err = publiccode.NewParser(publiccode.ParserConfig{Domain: domain}) + if err != nil { + logEntries = append( + logEntries, + fmt.Sprintf("[%s] can't create a Parser: %s\n", repository.Name, err.Error()), + ) + + return + } + + var parsed publiccode.PublicCode + parsed, err = parser.Parse(repository.FileRawURL) + valid := true - err = parser.ParseInDomain(resp.Body, domain.Host, domain.UseTokenFor, domain.BasicAuth) if err != nil { var validationResults publiccode.ValidationResults if errors.As(err, &validationResults) { @@ -363,7 +367,8 @@ func (c *Crawler) ProcessRepo(repository common.Repository) { //nolint:maintidx publisherID := viper.GetString("MAIN_PUBLISHER_ID") if valid && repository.Publisher.ID != publisherID { - err = validateFile(repository.Publisher, *parser, repository.FileRawURL) + //nolint:forcetypeassert // we'd want to panic here anyway if the library returns a non v0 + err = validateFile(repository.Publisher, parsed.(publiccode.PublicCodeV0), repository.FileRawURL) if err != nil { valid = false } @@ -372,6 +377,8 @@ func (c *Crawler) ProcessRepo(repository common.Repository) { //nolint:maintidx if !valid { logEntries = append(logEntries, fmt.Sprintf("[%s] BAD publiccode.yml: %+v\n", repository.Name, err)) metrics.GetCounter("repository_bad_publiccodeyml", c.Index).Inc() + + return } else { logEntries = append(logEntries, fmt.Sprintf("[%s] GOOD publiccode.yml\n", repository.Name)) metrics.GetCounter("repository_good_publiccodeyml", c.Index).Inc() @@ -391,7 +398,7 @@ func (c *Crawler) ProcessRepo(repository common.Repository) { //nolint:maintidx aliases = append(aliases, repository.URL.String()) } - publiccodeYml, err := parser.ToYAML() + publiccodeYml, err := parsed.ToYAML() if err != nil { logEntries = append(logEntries, fmt.Sprintf("[%s] parsing error: %s", repository.Name, err.Error())) @@ -433,7 +440,7 @@ func (c *Crawler) ProcessRepo(repository common.Repository) { //nolint:maintidx if !viper.GetBool("SKIP_VITALITY") && !c.DryRun { // Clone repository. - err = git.CloneRepository(repository.URL.Host, repository.Name, parser.PublicCode.URL.String(), c.Index) + err = git.CloneRepository(repository.URL.Host, repository.Name, parsed.Url().String(), c.Index) if err != nil { logEntries = append(logEntries, fmt.Sprintf("[%s] error while cloning: %v\n", repository.Name, err)) } @@ -460,11 +467,11 @@ func (c *Crawler) ProcessRepo(repository common.Repository) { //nolint:maintidx // validateFile performs additional validations that are not strictly mandated // by the publiccode.yml Standard. // Using `one` command this check will be skipped. -func validateFile(publisher common.Publisher, parser publiccode.Parser, fileRawURL string) error { +func validateFile(publisher common.Publisher, parsed publiccode.PublicCodeV0, fileRawURL string) error { u, _ := url.Parse(fileRawURL) repo1 := vcsurl.GetRepo(u) - repo2 := vcsurl.GetRepo((*url.URL)(parser.PublicCode.URL)) + repo2 := vcsurl.GetRepo((*url.URL)(parsed.Url())) if repo1 != nil && repo2 != nil { // Let's ignore the schema when checking for equality. @@ -477,7 +484,7 @@ func validateFile(publisher common.Publisher, parser publiccode.Parser, fileRawU return fmt.Errorf( "declared url (%s) and actual publiccode.yml location URL (%s) "+ "are not in the same repo: '%s' vs '%s'", - parser.PublicCode.URL, fileRawURL, repo2, repo1, + parsed.Url(), fileRawURL, repo2, repo1, ) } } @@ -494,11 +501,11 @@ func validateFile(publisher common.Publisher, parser publiccode.Parser, fileRawU if !idIsUUID && !strings.EqualFold( strings.TrimSpace(publisher.ID), - strings.TrimSpace(parser.PublicCode.It.Riuso.CodiceIPA), + strings.TrimSpace(parsed.It.Riuso.CodiceIPA), ) { return fmt.Errorf( "codiceIPA is '%s', but '%s' was expected for '%s' in %s", - parser.PublicCode.It.Riuso.CodiceIPA, + parsed.It.Riuso.CodiceIPA, publisher.ID, publisher.Name, fileRawURL, diff --git a/go.mod b/go.mod index 6d05ab4..baaa748 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ require ( github.com/google/go-github/v43 v43.0.0 github.com/hashicorp/go-retryablehttp v0.7.1 github.com/italia/httpclient-lib-go v0.0.2 - github.com/italia/publiccode-parser-go/v3 v3.1.3 + github.com/italia/publiccode-parser-go/v4 v4.0.0 github.com/ktrysmt/go-bitbucket v0.9.63 github.com/prometheus/client_golang v1.11.1 github.com/prometheus/client_model v0.3.0 diff --git a/go.sum b/go.sum index 5d4013a..fcf4ece 100644 --- a/go.sum +++ b/go.sum @@ -914,8 +914,8 @@ github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NH github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/italia/httpclient-lib-go v0.0.2 h1:4bJLywTVd7qHPdKxJXvvhlXp436JTC4KA6dLhIl5a6c= github.com/italia/httpclient-lib-go v0.0.2/go.mod h1:b0/D3ULsBw8X+zEl7j/kSZmiMlUdj+agppneOvSq6eA= -github.com/italia/publiccode-parser-go/v3 v3.1.3 h1:o4x0K6dJgBUh1Tac99AJEyM6uNV+e1GEJ7F5r+uMNMI= -github.com/italia/publiccode-parser-go/v3 v3.1.3/go.mod h1:TvC+rGxBbIE+riQyey4GFtyC7GgKKTGmtsHINIp4aQ8= +github.com/italia/publiccode-parser-go/v4 v4.0.0 h1:1/q4VH7WtnkaJyq3Cn0HU0KUz9X6uRPzmxNariKuI3w= +github.com/italia/publiccode-parser-go/v4 v4.0.0/go.mod h1:qmxP/BgLwCeNMjfcXbRz8WCMPn85Pekcy+oGTUmEF4U= github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A= github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo=