From 46d19c4676efe5201c5de790bcb963bfc93a95c7 Mon Sep 17 00:00:00 2001
From: Alexey Terentyev <axifnx@gmail.com>
Date: Thu, 21 Jun 2018 12:09:46 +0300
Subject: [PATCH] Fix topics addition (Another solution) (#4031) (#4258)

* Added topics validation, fixed repo topics duplication (#4031)

Signed-off-by: Alexey Terentyev <axifnx@gmail.com>

* Added tests

Signed-off-by: Alexey Terentyev <axifnx@gmail.com>

* Fixed fmt

Signed-off-by: Alexey Terentyev <axifnx@gmail.com>

* Added comments to exported functions

Signed-off-by: Alexey Terentyev <axifnx@gmail.com>

* Deleted RemoveDuplicateTopics function

Signed-off-by: Alexey Terentyev <axifnx@gmail.com>

* Fixed messages

Signed-off-by: Alexey Terentyev <axifnx@gmail.com>

* Added migration

Signed-off-by: Alexey Terentyev <axifnx@gmail.com>

* fmt migration file

Signed-off-by: Alexey Terentyev <axifnx@gmail.com>

* fixed lint

Signed-off-by: Alexey Terentyev <axifnx@gmail.com>

* Added Copyright

Signed-off-by: Alexey Terentyev <axifnx@gmail.com>

* Added query solution for duplicates

Signed-off-by: Alexey Terentyev <axifnx@gmail.com>

* Fixed migration query

Signed-off-by: Alexey Terentyev <axifnx@gmail.com>

* Changed RegExp. Fixed migration

Signed-off-by: Alexey Terentyev <axifnx@gmail.com>

* fmt migration file

Signed-off-by: Alexey Terentyev <axifnx@gmail.com>

* Fixed test for changed regexp

Signed-off-by: Alexey Terentyev <axifnx@gmail.com>

* Removed validation log messages

Signed-off-by: Alexey Terentyev <axifnx@gmail.com>

* Renamed migration file

Signed-off-by: Alexey Terentyev <axifnx@gmail.com>

* Renamed validate function

Signed-off-by: Alexey Terentyev <axifnx@gmail.com>
---
 models/migrations/migrations.go |   2 +
 models/migrations/v68.go        | 160 ++++++++++++++++++++++++++++++++
 models/topic.go                 |  15 +++
 models/topic_test.go            |  13 +++
 options/locale/locale_en-US.ini |   2 +
 public/js/index.js              |   4 +-
 routers/repo/topic.go           |  35 ++++++-
 routers/routes/routes.go        |   2 +-
 8 files changed, 229 insertions(+), 4 deletions(-)
 create mode 100644 models/migrations/v68.go

diff --git a/models/migrations/migrations.go b/models/migrations/migrations.go
index 2537e5712..7732e1709 100644
--- a/models/migrations/migrations.go
+++ b/models/migrations/migrations.go
@@ -188,6 +188,8 @@ var migrations = []Migration{
 	NewMigration("add login source id column for public_key table", addLoginSourceIDToPublicKeyTable),
 	// v67 -> v68
 	NewMigration("remove stale watches", removeStaleWatches),
+	// v68 -> V69
+	NewMigration("Reformat and remove incorrect topics", reformatAndRemoveIncorrectTopics),
 }
 
 // Migrate database to current version
diff --git a/models/migrations/v68.go b/models/migrations/v68.go
new file mode 100644
index 000000000..d6a0d04c5
--- /dev/null
+++ b/models/migrations/v68.go
@@ -0,0 +1,160 @@
+// Copyright 2018 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package migrations
+
+import (
+	"strings"
+
+	"code.gitea.io/gitea/models"
+	"code.gitea.io/gitea/modules/log"
+
+	"github.com/go-xorm/xorm"
+)
+
+func reformatAndRemoveIncorrectTopics(x *xorm.Engine) (err error) {
+	log.Info("This migration could take up to minutes, please be patient.")
+	type Topic struct {
+		ID   int64
+		Name string `xorm:"unique"`
+	}
+
+	sess := x.NewSession()
+	defer sess.Close()
+
+	const batchSize = 100
+	touchedRepo := make(map[int64]struct{})
+	topics := make([]*Topic, 0, batchSize)
+	delTopicIDs := make([]int64, 0, batchSize)
+	ids := make([]int64, 0, 30)
+
+	if err := sess.Begin(); err != nil {
+		return err
+	}
+	log.Info("Validating existed topics...")
+	for start := 0; ; start += batchSize {
+		topics = topics[:0]
+		if err := sess.Asc("id").Limit(batchSize, start).Find(&topics); err != nil {
+			return err
+		}
+		if len(topics) == 0 {
+			break
+		}
+		for _, topic := range topics {
+			if models.ValidateTopic(topic.Name) {
+				continue
+			}
+			topic.Name = strings.Replace(strings.TrimSpace(strings.ToLower(topic.Name)), " ", "-", -1)
+
+			if err := sess.Table("repo_topic").Cols("repo_id").
+				Where("topic_id = ?", topic.ID).Find(&ids); err != nil {
+				return err
+			}
+			for _, id := range ids {
+				touchedRepo[id] = struct{}{}
+			}
+
+			if models.ValidateTopic(topic.Name) {
+				log.Info("Updating topic: id = %v, name = %v", topic.ID, topic.Name)
+				if _, err := sess.Table("topic").ID(topic.ID).
+					Update(&Topic{Name: topic.Name}); err != nil {
+					return err
+				}
+			} else {
+				delTopicIDs = append(delTopicIDs, topic.ID)
+			}
+		}
+	}
+
+	log.Info("Deleting incorrect topics...")
+	for start := 0; ; start += batchSize {
+		if (start + batchSize) < len(delTopicIDs) {
+			ids = delTopicIDs[start:(start + batchSize)]
+		} else {
+			ids = delTopicIDs[start:]
+		}
+
+		log.Info("Deleting 'repo_topic' rows for topics with ids = %v", ids)
+		if _, err := sess.In("topic_id", ids).Delete(&models.RepoTopic{}); err != nil {
+			return err
+		}
+
+		log.Info("Deleting topics with id = %v", ids)
+		if _, err := sess.In("id", ids).Delete(&Topic{}); err != nil {
+			return err
+		}
+
+		if len(ids) < batchSize {
+			break
+		}
+	}
+
+	repoTopics := make([]*models.RepoTopic, 0, batchSize)
+	delRepoTopics := make([]*models.RepoTopic, 0, batchSize)
+	tmpRepoTopics := make([]*models.RepoTopic, 0, 30)
+
+	log.Info("Checking the number of topics in the repositories...")
+	for start := 0; ; start += batchSize {
+		repoTopics = repoTopics[:0]
+		if err := sess.Cols("repo_id").Asc("repo_id").Limit(batchSize, start).
+			GroupBy("repo_id").Having("COUNT(*) > 25").Find(&repoTopics); err != nil {
+			return err
+		}
+		if len(repoTopics) == 0 {
+			break
+		}
+
+		log.Info("Number of repositories with more than 25 topics: %v", len(repoTopics))
+		for _, repoTopic := range repoTopics {
+			touchedRepo[repoTopic.RepoID] = struct{}{}
+
+			tmpRepoTopics = tmpRepoTopics[:0]
+			if err := sess.Where("repo_id = ?", repoTopic.RepoID).Find(&tmpRepoTopics); err != nil {
+				return err
+			}
+
+			log.Info("Repository with id = %v has %v topics", repoTopic.RepoID, len(tmpRepoTopics))
+
+			for i := len(tmpRepoTopics) - 1; i > 24; i-- {
+				delRepoTopics = append(delRepoTopics, tmpRepoTopics[i])
+			}
+		}
+	}
+
+	log.Info("Deleting superfluous topics for repositories (more than 25 topics)...")
+	for _, repoTopic := range delRepoTopics {
+		log.Info("Deleting 'repo_topic' rows for 'repository' with id = %v. Topic id = %v",
+			repoTopic.RepoID, repoTopic.TopicID)
+
+		if _, err := sess.Where("repo_id = ? AND topic_id = ?", repoTopic.RepoID,
+			repoTopic.TopicID).Delete(&models.RepoTopic{}); err != nil {
+			return err
+		}
+		if _, err := sess.Exec(
+			"UPDATE topic SET repo_count = (SELECT repo_count FROM topic WHERE id = ?) - 1 WHERE id = ?",
+			repoTopic.TopicID, repoTopic.TopicID); err != nil {
+			return err
+		}
+	}
+
+	topicNames := make([]string, 0, 30)
+	log.Info("Updating repositories 'topics' fields...")
+	for repoID := range touchedRepo {
+		if err := sess.Table("topic").Cols("name").
+			Join("INNER", "repo_topic", "topic.id = repo_topic.topic_id").
+			Where("repo_topic.repo_id = ?", repoID).Find(&topicNames); err != nil {
+			return err
+		}
+		log.Info("Updating 'topics' field for repository with id = %v", repoID)
+		if _, err := sess.ID(repoID).Cols("topics").
+			Update(&models.Repository{Topics: topicNames}); err != nil {
+			return err
+		}
+	}
+	if err := sess.Commit(); err != nil {
+		return err
+	}
+
+	return nil
+}
diff --git a/models/topic.go b/models/topic.go
index 3b1737f8a..247aac5ff 100644
--- a/models/topic.go
+++ b/models/topic.go
@@ -6,6 +6,7 @@ package models
 
 import (
 	"fmt"
+	"regexp"
 	"strings"
 
 	"code.gitea.io/gitea/modules/util"
@@ -20,6 +21,8 @@ func init() {
 	)
 }
 
+var topicPattern = regexp.MustCompile(`^[a-z0-9][a-z0-9-]*$`)
+
 // Topic represents a topic of repositories
 type Topic struct {
 	ID          int64
@@ -51,6 +54,11 @@ func (err ErrTopicNotExist) Error() string {
 	return fmt.Sprintf("topic is not exist [name: %s]", err.Name)
 }
 
+// ValidateTopic checks topics by length and match pattern rules
+func ValidateTopic(topic string) bool {
+	return len(topic) <= 35 && topicPattern.MatchString(topic)
+}
+
 // GetTopicByName retrieves topic by name
 func GetTopicByName(name string) (*Topic, error) {
 	var topic Topic
@@ -182,6 +190,13 @@ func SaveTopics(repoID int64, topicNames ...string) error {
 		}
 	}
 
+	topicNames = topicNames[:0]
+	if err := sess.Table("topic").Cols("name").
+		Join("INNER", "repo_topic", "topic.id = repo_topic.topic_id").
+		Where("repo_topic.repo_id = ?", repoID).Find(&topicNames); err != nil {
+		return err
+	}
+
 	if _, err := sess.ID(repoID).Cols("topics").Update(&Repository{
 		Topics: topicNames,
 	}); err != nil {
diff --git a/models/topic_test.go b/models/topic_test.go
index 472f4e52d..ef374e557 100644
--- a/models/topic_test.go
+++ b/models/topic_test.go
@@ -55,3 +55,16 @@ func TestAddTopic(t *testing.T) {
 	assert.NoError(t, err)
 	assert.EqualValues(t, 2, len(topics))
 }
+
+func TestTopicValidator(t *testing.T) {
+	assert.True(t, ValidateTopic("12345"))
+	assert.True(t, ValidateTopic("2-test"))
+	assert.True(t, ValidateTopic("test-3"))
+	assert.True(t, ValidateTopic("first"))
+	assert.True(t, ValidateTopic("second-test-topic"))
+	assert.True(t, ValidateTopic("third-project-topic-with-max-length"))
+
+	assert.False(t, ValidateTopic("$fourth-test,topic"))
+	assert.False(t, ValidateTopic("-fifth-test-topic"))
+	assert.False(t, ValidateTopic("sixth-go-project-topic-with-excess-length"))
+}
diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini
index 8cf6111c6..21ae775e4 100644
--- a/options/locale/locale_en-US.ini
+++ b/options/locale/locale_en-US.ini
@@ -1167,6 +1167,8 @@ branch.protected_deletion_failed = Branch '%s' is protected. It cannot be delete
 
 topic.manage_topics = Manage Topics
 topic.done = Done
+topic.count_prompt = You can't select more than 25 topics
+topic.format_prompt = Topics must start with a letter or number, can include hyphens(-) and must be no more than 35 characters long
 
 [org]
 org_name_holder = Organization Name
diff --git a/public/js/index.js b/public/js/index.js
index e98a3fe6d..823dd8766 100644
--- a/public/js/index.js
+++ b/public/js/index.js
@@ -2336,8 +2336,10 @@ function initTopicbar() {
         }).done(function() {
             editDiv.hide();
             viewDiv.show();
+        }).fail(function(xhr) {
+            alert(xhr.responseJSON.message)
         })
-    })
+    });
 
     $('#topic_edit .dropdown').dropdown({
         allowAdditions: true,
diff --git a/routers/repo/topic.go b/routers/repo/topic.go
index 2a43d53ff..63fcf793f 100644
--- a/routers/repo/topic.go
+++ b/routers/repo/topic.go
@@ -12,8 +12,8 @@ import (
 	"code.gitea.io/gitea/modules/log"
 )
 
-// TopicPost response for creating repository
-func TopicPost(ctx *context.Context) {
+// TopicsPost response for creating repository
+func TopicsPost(ctx *context.Context) {
 	if ctx.User == nil {
 		ctx.JSON(403, map[string]interface{}{
 			"message": "Only owners could change the topics.",
@@ -27,6 +27,37 @@ func TopicPost(ctx *context.Context) {
 		topics = strings.Split(topicsStr, ",")
 	}
 
+	invalidTopics := make([]string, 0)
+	i := 0
+	for _, topic := range topics {
+		topic = strings.TrimSpace(strings.ToLower(topic))
+		// ignore empty string
+		if len(topic) > 0 {
+			topics[i] = topic
+			i++
+		}
+		if !models.ValidateTopic(topic) {
+			invalidTopics = append(invalidTopics, topic)
+		}
+	}
+	topics = topics[:i]
+
+	if len(topics) > 25 {
+		ctx.JSON(422, map[string]interface{}{
+			"invalidTopics": topics[:0],
+			"message":       ctx.Tr("repo.topic.count_prompt"),
+		})
+		return
+	}
+
+	if len(invalidTopics) > 0 {
+		ctx.JSON(422, map[string]interface{}{
+			"invalidTopics": invalidTopics,
+			"message":       ctx.Tr("repo.topic.format_prompt"),
+		})
+		return
+	}
+
 	err := models.SaveTopics(ctx.Repo.Repository.ID, topics...)
 	if err != nil {
 		log.Error(2, "SaveTopics failed: %v", err)
diff --git a/routers/routes/routes.go b/routers/routes/routes.go
index 250b98507..1eefbf1b6 100644
--- a/routers/routes/routes.go
+++ b/routers/routes/routes.go
@@ -626,7 +626,7 @@ func RegisterRoutes(m *macaron.Macaron) {
 	}, context.RepoAssignment(), context.UnitTypes(), context.LoadRepoUnits(), context.CheckUnit(models.UnitTypeReleases))
 
 	m.Group("/:username/:reponame", func() {
-		m.Post("/topics", repo.TopicPost)
+		m.Post("/topics", repo.TopicsPost)
 	}, context.RepoAssignment(), reqRepoAdmin)
 
 	m.Group("/:username/:reponame", func() {