Skip to content
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion modules/git/repo_attribute.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ type CheckAttributeOpts struct {
AllAttributes bool
Attributes []string
Filenames []string
IndexFile string
}

// CheckAttribute return the Blame object of file
Expand Down Expand Up @@ -54,7 +55,12 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[

cmd := NewCommand(cmdArgs...)

if err := cmd.RunInDirPipeline(repo.Path, stdOut, stdErr); err != nil {
env := make([]string, 0, 1)
if len(opts.IndexFile) > 0 {
env = append(env, "GIT_INDEX_FILE="+opts.IndexFile)
}

if err := cmd.RunInDirTimeoutEnvFullPipeline(env, -1, repo.Path, stdOut, stdErr, nil); err != nil {
return nil, fmt.Errorf("Failed to run check-attr: %v\n%s\n%s", err, stdOut.String(), stdErr.String())
}

Expand Down
15 changes: 12 additions & 3 deletions modules/git/repo_language_stats_gogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import (
)

// GetLanguageStats calculates language stats for git repository at specified commit
func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
func (repo *Repository) GetLanguageStats(commitID string, preCheck func(path string) (string, bool)) (map[string]int64, error) {
r, err := git.PlainOpen(repo.Path)
if err != nil {
return nil, err
Expand Down Expand Up @@ -57,9 +57,18 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
return nil
}

// TODO: Use .gitattributes file for linguist overrides
language := ""
skip := false
if preCheck != nil {
language, skip = preCheck(f.Name)
if skip {
return nil
}
}

language := analyze.GetCodeLanguage(f.Name, content)
if len(language) == 0 {
language = analyze.GetCodeLanguage(f.Name, content)
}
if language == enry.OtherLanguage || language == "" {
return nil
}
Expand Down
22 changes: 17 additions & 5 deletions modules/git/repo_language_stats_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import (
)

// GetLanguageStats calculates language stats for git repository at specified commit
func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
func (repo *Repository) GetLanguageStats(commitID string, preCheck func(path string) (string, bool)) (map[string]int64, error) {
// We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary.
// so let's create a batch stdin and stdout

Expand Down Expand Up @@ -128,10 +128,22 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
continue
}

// TODO: Use .gitattributes file for linguist overrides
// FIXME: Why can't we split this and the IsGenerated tests to avoid reading the blob unless absolutely necessary?
// - eg. do the all the detection tests using filename first before reading content.
language := analyze.GetCodeLanguage(f.Name(), content)
// Use .gitattributes file for linguist overrides
language := ""
skip := false
if preCheck != nil {
language, skip = preCheck(f.Name())
if skip {
continue
}
}

if len(language) == 0 {
// FIXME: Why can't we split this and the IsGenerated tests to avoid reading the blob unless absolutely necessary?
// - eg. do the all the detection tests using filename first before reading content.
language = analyze.GetCodeLanguage(f.Name(), content)
}

if language == enry.OtherLanguage || language == "" {
continue
}
Expand Down
60 changes: 59 additions & 1 deletion modules/indexer/stats/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@
package stats

import (
"io/ioutil"
"os"

"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/util"
)

// DBIndexer implements Indexer interface to use database's like search
Expand Down Expand Up @@ -47,8 +51,62 @@ func (db *DBIndexer) Index(id int64) error {
return nil
}

var tmpIndex *os.File
if git.CheckGitVersionAtLeast("1.7.8") == nil {
tmpIndex, err = ioutil.TempFile("", "index")
if err != nil {
return err
}
defer func() {
err := util.Remove(tmpIndex.Name())
if err != nil {
log.Error("failed to remove tmp index file: %v", err)
}
}()

_, err = git.NewCommand("read-tree", commitID).
RunInDirWithEnv(gitRepo.Path, []string{"GIT_INDEX_FILE=" + tmpIndex.Name()})
if err != nil {
return err
}
}

// Calculate and save language statistics to database
stats, err := gitRepo.GetLanguageStats(commitID)
stats, err := gitRepo.GetLanguageStats(commitID, func(path string) (string, bool) {
// get language follow linguist rulers
// linguist-language=<lang> attribute to an language
// linguist-vendored attribute to vendor or un-vendor paths

if tmpIndex == nil {
return "", false
}

name2attribute2info, err := gitRepo.CheckAttribute(git.CheckAttributeOpts{
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK Now I understand what this is doing - I think we're gonna need a pipe which goes to write to an open

git check-attr --stdin ...

and then read and parse the line produced per path.

Otherwise you're going to get n calls to git check-attr per repository.

Attributes: []string{"linguist-vendored", "linguist-language"},
Filenames: []string{path},
CachedOnly: true,
IndexFile: tmpIndex.Name(),
})
if err != nil {
log.Error("gitRepo.CheckAttribute: %v", err)
return "", false
}

attribute2info, has := name2attribute2info[path]
if !has {
return "", false
}
if attribute2info["linguist-vendored"] == "set" {
return "", true
}

lang := attribute2info["linguist-language"]
if lang == "unspecified" {
lang = ""
}

return lang, false
})
if err != nil {
log.Error("Unable to get language stats for ID %s for defaultbranch %s in %s. Error: %v", commitID, repo.DefaultBranch, repo.RepoPath(), err)
return err
Expand Down