Fix language stat calculation (#11692)
* Fix language stat calculation * Group languages and ignore 0 size files * remove unneeded code
This commit is contained in:
parent
ea4c139cd2
commit
9d652002c6
|
@ -26,22 +26,6 @@ type LanguageStat struct {
|
||||||
CreatedUnix timeutil.TimeStamp `xorm:"INDEX CREATED"`
|
CreatedUnix timeutil.TimeStamp `xorm:"INDEX CREATED"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// specialLanguages defines list of languages that are excluded from the calculation
|
|
||||||
// unless they are the only language present in repository. Only languages which under
|
|
||||||
// normal circumstances are not considered to be code should be listed here.
|
|
||||||
var specialLanguages = map[string]struct{}{
|
|
||||||
"XML": {},
|
|
||||||
"JSON": {},
|
|
||||||
"TOML": {},
|
|
||||||
"YAML": {},
|
|
||||||
"INI": {},
|
|
||||||
"SQL": {},
|
|
||||||
"SVG": {},
|
|
||||||
"Text": {},
|
|
||||||
"Markdown": {},
|
|
||||||
"other": {},
|
|
||||||
}
|
|
||||||
|
|
||||||
// LanguageStatList defines a list of language statistics
|
// LanguageStatList defines a list of language statistics
|
||||||
type LanguageStatList []*LanguageStat
|
type LanguageStatList []*LanguageStat
|
||||||
|
|
||||||
|
@ -55,27 +39,12 @@ func (stats LanguageStatList) getLanguagePercentages() map[string]float32 {
|
||||||
langPerc := make(map[string]float32)
|
langPerc := make(map[string]float32)
|
||||||
var otherPerc float32 = 100
|
var otherPerc float32 = 100
|
||||||
var total int64
|
var total int64
|
||||||
// Check that repository has at least one non-special language
|
|
||||||
var skipSpecial bool
|
|
||||||
for _, stat := range stats {
|
for _, stat := range stats {
|
||||||
if _, ok := specialLanguages[stat.Language]; !ok {
|
|
||||||
skipSpecial = true
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for _, stat := range stats {
|
|
||||||
// Exclude specific languages from percentage calculation
|
|
||||||
if _, ok := specialLanguages[stat.Language]; ok && skipSpecial {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
total += stat.Size
|
total += stat.Size
|
||||||
}
|
}
|
||||||
if total > 0 {
|
if total > 0 {
|
||||||
for _, stat := range stats {
|
for _, stat := range stats {
|
||||||
// Exclude specific languages from percentage calculation
|
|
||||||
if _, ok := specialLanguages[stat.Language]; ok && skipSpecial {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
perc := float32(math.Round(float64(stat.Size)/float64(total)*1000) / 10)
|
perc := float32(math.Round(float64(stat.Size)/float64(total)*1000) / 10)
|
||||||
if perc <= 0.1 {
|
if perc <= 0.1 {
|
||||||
continue
|
continue
|
||||||
|
@ -84,8 +53,6 @@ func (stats LanguageStatList) getLanguagePercentages() map[string]float32 {
|
||||||
langPerc[stat.Language] = perc
|
langPerc[stat.Language] = perc
|
||||||
}
|
}
|
||||||
otherPerc = float32(math.Round(float64(otherPerc)*10) / 10)
|
otherPerc = float32(math.Round(float64(otherPerc)*10) / 10)
|
||||||
} else {
|
|
||||||
otherPerc = 100
|
|
||||||
}
|
}
|
||||||
if otherPerc > 0 {
|
if otherPerc > 0 {
|
||||||
langPerc["other"] = otherPerc
|
langPerc["other"] = otherPerc
|
||||||
|
|
|
@ -19,6 +19,20 @@ import (
|
||||||
|
|
||||||
const fileSizeLimit int64 = 16 * 1024 * 1024
|
const fileSizeLimit int64 = 16 * 1024 * 1024
|
||||||
|
|
||||||
|
// specialLanguages defines list of languages that are excluded from the calculation
|
||||||
|
// unless they are the only language present in repository. Only languages which under
|
||||||
|
// normal circumstances are not considered to be code should be listed here.
|
||||||
|
var specialLanguages = []string{
|
||||||
|
"XML",
|
||||||
|
"JSON",
|
||||||
|
"TOML",
|
||||||
|
"YAML",
|
||||||
|
"INI",
|
||||||
|
"SVG",
|
||||||
|
"Text",
|
||||||
|
"Markdown",
|
||||||
|
}
|
||||||
|
|
||||||
// GetLanguageStats calculates language stats for git repository at specified commit
|
// GetLanguageStats calculates language stats for git repository at specified commit
|
||||||
func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
|
func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
|
||||||
r, err := git.PlainOpen(repo.Path)
|
r, err := git.PlainOpen(repo.Path)
|
||||||
|
@ -43,7 +57,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
|
||||||
|
|
||||||
sizes := make(map[string]int64)
|
sizes := make(map[string]int64)
|
||||||
err = tree.Files().ForEach(func(f *object.File) error {
|
err = tree.Files().ForEach(func(f *object.File) error {
|
||||||
if enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
|
if f.Size == 0 || enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
|
||||||
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
|
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -58,7 +72,13 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
|
||||||
|
|
||||||
language := analyze.GetCodeLanguage(f.Name, content)
|
language := analyze.GetCodeLanguage(f.Name, content)
|
||||||
if language == enry.OtherLanguage || language == "" {
|
if language == enry.OtherLanguage || language == "" {
|
||||||
language = "other"
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// group languages, such as Pug -> HTML; SCSS -> CSS
|
||||||
|
group := enry.GetLanguageGroup(language)
|
||||||
|
if group != "" {
|
||||||
|
language = group
|
||||||
}
|
}
|
||||||
|
|
||||||
sizes[language] += f.Size
|
sizes[language] += f.Size
|
||||||
|
@ -69,8 +89,11 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(sizes) == 0 {
|
// filter special languages unless they are the only language
|
||||||
sizes["other"] = 0
|
if len(sizes) > 1 {
|
||||||
|
for _, language := range specialLanguages {
|
||||||
|
delete(sizes, language)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return sizes, nil
|
return sizes, nil
|
||||||
|
|
|
@ -39,7 +39,5 @@ func TestRepoStatsIndex(t *testing.T) {
|
||||||
assert.Equal(t, "65f1bf27bc3bf70f64657658635e66094edbcb4d", status.CommitSha)
|
assert.Equal(t, "65f1bf27bc3bf70f64657658635e66094edbcb4d", status.CommitSha)
|
||||||
langs, err := repo.GetTopLanguageStats(5)
|
langs, err := repo.GetTopLanguageStats(5)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Len(t, langs, 1)
|
assert.Empty(t, langs)
|
||||||
assert.Equal(t, "other", langs[0].Language)
|
|
||||||
assert.Equal(t, float32(100), langs[0].Percentage)
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue