add feature full text search

phachon · Feb 16, 2020 · 3c9c94a · 3c9c94a
1 parent 1877c8d
commit 3c9c94a
Show file tree

Hide file tree

Showing 98 changed files with 602,864 additions and 71 deletions.
diff --git a/app/bootstrap.go b/app/bootstrap.go
@@ -6,8 +6,10 @@ import (
 	"github.com/astaxie/beego"
 	"github.com/astaxie/beego/logs"
 	"github.com/fatih/color"
+	"github.com/go-ego/riot/types"
 	"github.com/phachon/mm-wiki/app/models"
 	"github.com/phachon/mm-wiki/app/utils"
+	"github.com/phachon/mm-wiki/app/work"
 	"github.com/phachon/mm-wiki/global"
 	"github.com/snail007/go-activerecord/mysql"
 	"log"
@@ -18,7 +20,6 @@ import (
 )
 
 var (
-
 	defaultConf = "conf/mm-wiki.conf"
 
 	confPath = flag.String("conf", "", "please set mm-wiki conf path")
@@ -44,6 +45,8 @@ var (
 	ImageAbsDir = ""
 
 	AttachmentAbsDir = ""
+
+	SearchIndexAbsDir = ""
 )
 
 func init() {
@@ -53,6 +56,8 @@ func init() {
 	initDB()
 	checkUpgrade()
 	initDocumentDir()
+	initSearch()
+	initWork()
 	StartTime = time.Now().Unix()
 }
 
@@ -180,36 +185,24 @@ func initDocumentDir() {
 	imagesAbsDir := path.Join(documentAbsDir, "images")
 	// attachment save dir
 	attachmentAbsDir := path.Join(documentAbsDir, "attachment")
+	// search index dir
+	searchIndexAbsDir := path.Join(documentAbsDir, "search-index")
 
 	MarkdownAbsDir = markDownAbsDir
 	ImageAbsDir = imagesAbsDir
 	AttachmentAbsDir = attachmentAbsDir
-
-	// create markdown dir
-	ok, _ = utils.File.PathIsExists(markDownAbsDir)
-	if !ok {
-		err := os.Mkdir(markDownAbsDir, 0777)
-		if err != nil {
-			logs.Error("create document markdown dir " + markDownAbsDir + " error!")
-			os.Exit(1)
-		}
-	}
-	// create image dir
-	ok, _ = utils.File.PathIsExists(imagesAbsDir)
-	if !ok {
-		err := os.Mkdir(imagesAbsDir, 0777)
-		if err != nil {
-			logs.Error("create document image dir " + imagesAbsDir + " error!")
-			os.Exit(1)
-		}
-	}
-	// create attachment dir
-	ok, _ = utils.File.PathIsExists(attachmentAbsDir)
-	if !ok {
-		err := os.Mkdir(attachmentAbsDir, 0777)
-		if err != nil {
-			logs.Error("create document attachment dir " + attachmentAbsDir + " error!")
-			os.Exit(1)
+	SearchIndexAbsDir = searchIndexAbsDir
+
+	dirList := []string{MarkdownAbsDir, ImageAbsDir, AttachmentAbsDir, SearchIndexAbsDir}
+	// create dir
+	for _, dir := range dirList {
+		ok, _ = utils.File.PathIsExists(dir)
+		if !ok {
+			err := os.Mkdir(dir, 0777)
+			if err != nil {
+				logs.Error("create document dir "+dir+" error=%s", err.Error())
+				os.Exit(1)
+			}
 		}
 	}
 
@@ -252,3 +245,39 @@ func checkUpgrade() {
 		os.Exit(0)
 	}
 }
+
+func initSearch() {
+
+	gseFile := filepath.Join(RootDir, "docs/search_dict/dictionary.txt")
+	stopFile := filepath.Join(RootDir, "docs/search_dict/stop_tokens.txt")
+	ok, _ := utils.File.PathIsExists(gseFile)
+	if !ok {
+		logs.Error("search dict file " + gseFile + " is not exists!")
+		os.Exit(1)
+	}
+	ok, _ = utils.File.PathIsExists(stopFile)
+	if !ok {
+		logs.Error("search stop dict file " + stopFile + " is not exists!")
+		os.Exit(1)
+	}
+	global.DocSearcher.Init(types.EngineOpts{
+		UseStore:    true,
+		StoreFolder: SearchIndexAbsDir,
+		Using:       3,
+		//GseDict:       "zh",
+		GseDict:       gseFile,
+		StopTokenFile: stopFile,
+		IndexerOpts: &types.IndexerOpts{
+			IndexType: types.LocsIndex,
+		},
+	})
+}
+
+func initWork() {
+	// 搜索索引 work
+	intervalTime, _ := beego.AppConfig.Int64("search::interval_time")
+	if intervalTime == 0 {
+		intervalTime = 30
+	}
+	work.InitDocSearchIndexWork(time.Duration(intervalTime) * time.Second)
+}
diff --git a/app/controllers/main.go b/app/controllers/main.go
@@ -1,7 +1,11 @@
 package controllers
 
 import (
+	"github.com/go-ego/riot/types"
 	"github.com/phachon/mm-wiki/app/models"
+	"github.com/phachon/mm-wiki/app/utils"
+	"github.com/phachon/mm-wiki/global"
+	"strings"
 )
 
 type MainController struct {
@@ -137,35 +141,86 @@ func (this *MainController) About() {
 	this.viewLayout("main/about", "default")
 }
 
+// 搜索，支持根据标题和内容搜索
 func (this *MainController) Search() {
 
-	page, _ := this.GetInt("page", 1)
-	documentName := this.GetString("document_name", "")
-	number, _ := this.GetRangeInt("number", 20, 10, 100)
-	limit := (page - 1) * number
+	keyword := strings.TrimSpace(this.GetString("keyword", ""))
+	searchType := this.GetString("search_type", "content")
 
+	this.Data["search_type"] = searchType
+	this.Data["keyword"] = keyword
+	this.Data["count"] = 0
+	if keyword == "" {
+		this.viewLayout("main/search", "default")
+		return
+	}
 	var documents = []map[string]string{}
 	var err error
-	var count int64
-
-	if documentName != "" {
-		count, err = models.DocumentModel.CountDocumentsLikeName(documentName)
-		if err != nil {
-			this.ErrorLog("搜索文档总数出错：" + err.Error())
-			this.ViewError("搜索文档错误！")
+	// 获取该用户有权限的空间
+	publicSpaces, err := models.SpaceModel.GetSpacesByVisitLevel(models.Space_VisitLevel_Public)
+	if err != nil {
+		this.ErrorLog("搜索文档列表获取用户空间权限出错：" + err.Error())
+		this.ViewError("搜索文档错误！")
+	}
+	spaceUsers, err := models.SpaceUserModel.GetSpaceUsersByUserId(this.UserId)
+	if err != nil {
+		this.ErrorLog("搜索文档列表获取用户空间权限出错：" + err.Error())
+		this.ViewError("搜索文档错误！")
+	}
+	spaceIdsMap := make(map[string]bool)
+	for _, publicSpace := range publicSpaces {
+		spaceIdsMap[publicSpace["space_id"]] = true
+	}
+	for _, spaceUser := range spaceUsers {
+		if _, ok := spaceIdsMap[spaceUser["space_id"]]; !ok {
+			spaceIdsMap[spaceUser["space_id"]] = true
 		}
-		if count > 0 {
-			documents, err = models.DocumentModel.GetDocumentsByLikeNameAndLimit(documentName, limit, number)
-			if err != nil {
-				this.ErrorLog("搜索文档列表出错：" + err.Error())
-				this.ViewError("搜索文档错误！")
+	}
+	searchDocContents := make(map[string]string)
+	// 默认根据内容搜索
+	if searchType == "title" {
+		documents, err = models.DocumentModel.GetDocumentsByLikeName(keyword)
+	} else {
+		searchRes := global.DocSearcher.SearchDoc(types.SearchReq{Text: keyword})
+		searchDocIds := []string{}
+		for _, searchDoc := range searchRes.Docs {
+			if len(searchDoc.TokenSnippetLocs) == 0 {
+				continue
 			}
+			docId := searchDoc.DocId
+			content := searchDoc.Content
+			locIndex := searchDoc.TokenSnippetLocs[0]
+			searchContent := utils.Misc.SubStrUnicodeBySubStrIndex(content, keyword, locIndex, 30, 30)
+			searchDocContents[docId] = searchContent
+			searchDocIds = append(searchDocIds, docId)
 		}
+		documents, err = models.DocumentModel.GetDocumentsByDocumentIds(searchDocIds)
+	}
+	if err != nil {
+		this.ErrorLog("搜索文档出错：" + err.Error())
+		this.ViewError("搜索文档错误！")
+	}
+	// 过滤一下没权限的空间
+	realDocuments := []map[string]string{}
+	for _, document := range documents {
+		spaceId, _ := document["space_id"]
+		documentId, _ := document["document_id"]
+		if _, ok := spaceIdsMap[spaceId]; !ok {
+			continue
+		}
+		if searchType != "title" {
+			searchContent, ok := searchDocContents[documentId]
+			if !ok || searchContent == "" {
+				continue
+			}
+			document["search_content"] = searchContent
+		}
+		realDocuments = append(realDocuments, document)
 	}
 
-	this.Data["document_name"] = documentName
-	this.Data["documents"] = documents
-	this.Data["count"] = count
-	this.SetPaginator(number, count)
+	this.Data["search_type"] = searchType
+	this.Data["keyword"] = keyword
+	this.Data["documents"] = realDocuments
+	this.Data["count"] = len(realDocuments)
 	this.viewLayout("main/search", "default")
 }
diff --git a/app/models/document.go b/app/models/document.go
@@ -528,6 +528,19 @@ func (d *Document) GetAllDocumentsByDocumentIds(documentIds []string) (documents
 	return
 }
 
+func (d *Document) GetAllDocuments() (documents []map[string]string, err error) {
+	db := G.DB()
+	var rs *mysql.ResultSet
+	rs, err = db.Query(db.AR().From(Table_Document_Name).Where(map[string]interface{}{
+		"is_delete": Document_Delete_False,
+	}))
+	if err != nil {
+		return
+	}
+	documents = rs.Rows()
+	return
+}
+
 func (d *Document) GetParentDocumentsByDocument(document map[string]string) (parentDocuments []map[string]string, pageFile string, err error) {
 
 	if document["parent_id"] == "0" {
@@ -629,3 +642,18 @@ func (d *Document) GetDocumentGroupEditUserId() (documents []map[string]string,
 	documents = rs.Rows()
 	return
 }
+
+// 根据文档信息获取文档内容和文件地址
+func (d *Document) GetDocumentContentByDocument(doc map[string]string) (content string, pageFile string, err error) {
+	// get document page file
+	_, pageFile, err = DocumentModel.GetParentDocumentsByDocument(doc)
+	if err != nil {
+		return content, pageFile, err
+	}
+	// get document content
+	content, err = utils.Document.GetContentByPageFile(pageFile)
+	if err != nil {
+		return content, pageFile, err
+	}
+	return content, pageFile, nil
+}
diff --git a/app/models/space.go b/app/models/space.go
@@ -264,6 +264,23 @@ func (s *Space) GetSpaces() (spaces []map[string]string, err error) {
 	return
 }
 
+// get spaces by visitLevel
+func (s *Space) GetSpacesByVisitLevel(visitLevel string) (spaces []map[string]string, err error) {
+
+	db := G.DB()
+	var rs *mysql.ResultSet
+	rs, err = db.Query(
+		db.AR().From(Table_Space_Name).Where(map[string]interface{}{
+			"visit_level": visitLevel,
+			"is_delete":   Space_Delete_False,
+		}))
+	if err != nil {
+		return
+	}
+	spaces = rs.Rows()
+	return
+}
+
 // get space count
 func (s *Space) CountSpaces() (count int64, err error) {
 

diff --git a/app/utils/misc.go b/app/utils/misc.go
@@ -150,3 +150,62 @@ func (m *misc) Page(total, page, pagesize int, url string, args ...interface{})
 	}
 	return ""
 }
+
+// 获取字符串子串的位置
+func (m *misc) GetStrUnicodeIndex(str string, substr string) int {
+	// 子串在字符串的字节位置
+	result := strings.Index(str, substr)
+	if result >= 0 {
+		return m.GetStrUnicodeIndexByByteIndex(str, result)
+	}
+	return -1
+}
+
+// 根据字符串字节位置获取字符串位置
+func (m *misc) GetStrUnicodeIndexByByteIndex(str string, subStrByteIndex int) int {
+	if subStrByteIndex > len(str)-1 {
+		return -1
+	}
+	// 获得子串之前的字符串并转换成[]byte
+	prefix := []byte(str)[0:subStrByteIndex]
+	// 将子串之前的字符串转换成[]rune
+	rs := []rune(string(prefix))
+	// 获得子串之前的字符串的长度，便是子串在字符串的字符位置
+	result := len(rs)
+	return result
+}
+
+// 截取包含子字符串的一段字符串，前后截取
+func (m *misc) SubStrUnicode(str string, subStr string, preLen int, sufLen int) string {
+	subStrRune := []rune(subStr)
+	strRune := []rune(str)
+	count := len(strRune)
+	subStrUnicodeIndex := m.GetStrUnicodeIndex(str, subStr)
+	startIndex := 0
+	endIndex := count - 1
+	if subStrUnicodeIndex-preLen > 0 {
+		startIndex = subStrUnicodeIndex - preLen
+	}
+	if subStrUnicodeIndex+len(subStrRune)+sufLen < count-1 {
+		endIndex = subStrUnicodeIndex + len(subStrRune) + sufLen
+	}
+	return string(strRune[startIndex:endIndex])
+}
+
+// 截取包含子字符串的一段字符串，前后截取
+// subStrIndex 已只子串的字节位置
+func (m *misc) SubStrUnicodeBySubStrIndex(str string, subStr string, subStrIndex int, preLen int, sufLen int) string {
+	subStrRune := []rune(subStr)
+	strRune := []rune(str)
+	count := len(strRune)
+	subStrUnicodeIndex := m.GetStrUnicodeIndexByByteIndex(str, subStrIndex)
+	startIndex := 0
+	endIndex := count - 1
+	if subStrUnicodeIndex-preLen > 0 {
+		startIndex = subStrUnicodeIndex - preLen
+	}
+	if subStrUnicodeIndex+len(subStrRune)+sufLen < count-1 {
+		endIndex = subStrUnicodeIndex + len(subStrRune) + sufLen
+	}
+	return string(strRune[startIndex:endIndex])
+}