Skip to content

Commit

Permalink
add feature full text search
Browse files Browse the repository at this point in the history
  • Loading branch information
phachon committed Feb 16, 2020
1 parent 1877c8d commit 3c9c94a
Show file tree
Hide file tree
Showing 98 changed files with 602,864 additions and 71 deletions.
83 changes: 56 additions & 27 deletions app/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@ import (
"github.com/astaxie/beego"
"github.com/astaxie/beego/logs"
"github.com/fatih/color"
"github.com/go-ego/riot/types"
"github.com/phachon/mm-wiki/app/models"
"github.com/phachon/mm-wiki/app/utils"
"github.com/phachon/mm-wiki/app/work"
"github.com/phachon/mm-wiki/global"
"github.com/snail007/go-activerecord/mysql"
"log"
Expand All @@ -18,7 +20,6 @@ import (
)

var (

defaultConf = "conf/mm-wiki.conf"

confPath = flag.String("conf", "", "please set mm-wiki conf path")
Expand All @@ -44,6 +45,8 @@ var (
ImageAbsDir = ""

AttachmentAbsDir = ""

SearchIndexAbsDir = ""
)

func init() {
Expand All @@ -53,6 +56,8 @@ func init() {
initDB()
checkUpgrade()
initDocumentDir()
initSearch()
initWork()
StartTime = time.Now().Unix()
}

Expand Down Expand Up @@ -180,36 +185,24 @@ func initDocumentDir() {
imagesAbsDir := path.Join(documentAbsDir, "images")
// attachment save dir
attachmentAbsDir := path.Join(documentAbsDir, "attachment")
// search index dir
searchIndexAbsDir := path.Join(documentAbsDir, "search-index")

MarkdownAbsDir = markDownAbsDir
ImageAbsDir = imagesAbsDir
AttachmentAbsDir = attachmentAbsDir

// create markdown dir
ok, _ = utils.File.PathIsExists(markDownAbsDir)
if !ok {
err := os.Mkdir(markDownAbsDir, 0777)
if err != nil {
logs.Error("create document markdown dir " + markDownAbsDir + " error!")
os.Exit(1)
}
}
// create image dir
ok, _ = utils.File.PathIsExists(imagesAbsDir)
if !ok {
err := os.Mkdir(imagesAbsDir, 0777)
if err != nil {
logs.Error("create document image dir " + imagesAbsDir + " error!")
os.Exit(1)
}
}
// create attachment dir
ok, _ = utils.File.PathIsExists(attachmentAbsDir)
if !ok {
err := os.Mkdir(attachmentAbsDir, 0777)
if err != nil {
logs.Error("create document attachment dir " + attachmentAbsDir + " error!")
os.Exit(1)
SearchIndexAbsDir = searchIndexAbsDir

dirList := []string{MarkdownAbsDir, ImageAbsDir, AttachmentAbsDir, SearchIndexAbsDir}
// create dir
for _, dir := range dirList {
ok, _ = utils.File.PathIsExists(dir)
if !ok {
err := os.Mkdir(dir, 0777)
if err != nil {
logs.Error("create document dir "+dir+" error=%s", err.Error())
os.Exit(1)
}
}
}

Expand Down Expand Up @@ -252,3 +245,39 @@ func checkUpgrade() {
os.Exit(0)
}
}

func initSearch() {

gseFile := filepath.Join(RootDir, "docs/search_dict/dictionary.txt")
stopFile := filepath.Join(RootDir, "docs/search_dict/stop_tokens.txt")
ok, _ := utils.File.PathIsExists(gseFile)
if !ok {
logs.Error("search dict file " + gseFile + " is not exists!")
os.Exit(1)
}
ok, _ = utils.File.PathIsExists(stopFile)
if !ok {
logs.Error("search stop dict file " + stopFile + " is not exists!")
os.Exit(1)
}
global.DocSearcher.Init(types.EngineOpts{
UseStore: true,
StoreFolder: SearchIndexAbsDir,
Using: 3,
//GseDict: "zh",
GseDict: gseFile,
StopTokenFile: stopFile,
IndexerOpts: &types.IndexerOpts{
IndexType: types.LocsIndex,
},
})
}

func initWork() {
// 搜索索引 work
intervalTime, _ := beego.AppConfig.Int64("search::interval_time")
if intervalTime == 0 {
intervalTime = 30
}
work.InitDocSearchIndexWork(time.Duration(intervalTime) * time.Second)
}
95 changes: 75 additions & 20 deletions app/controllers/main.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
package controllers

import (
"github.com/go-ego/riot/types"
"github.com/phachon/mm-wiki/app/models"
"github.com/phachon/mm-wiki/app/utils"
"github.com/phachon/mm-wiki/global"
"strings"
)

type MainController struct {
Expand Down Expand Up @@ -137,35 +141,86 @@ func (this *MainController) About() {
this.viewLayout("main/about", "default")
}

// 搜索,支持根据标题和内容搜索
func (this *MainController) Search() {

page, _ := this.GetInt("page", 1)
documentName := this.GetString("document_name", "")
number, _ := this.GetRangeInt("number", 20, 10, 100)
limit := (page - 1) * number
keyword := strings.TrimSpace(this.GetString("keyword", ""))
searchType := this.GetString("search_type", "content")

this.Data["search_type"] = searchType
this.Data["keyword"] = keyword
this.Data["count"] = 0
if keyword == "" {
this.viewLayout("main/search", "default")
return
}
var documents = []map[string]string{}
var err error
var count int64

if documentName != "" {
count, err = models.DocumentModel.CountDocumentsLikeName(documentName)
if err != nil {
this.ErrorLog("搜索文档总数出错:" + err.Error())
this.ViewError("搜索文档错误!")
// 获取该用户有权限的空间
publicSpaces, err := models.SpaceModel.GetSpacesByVisitLevel(models.Space_VisitLevel_Public)
if err != nil {
this.ErrorLog("搜索文档列表获取用户空间权限出错:" + err.Error())
this.ViewError("搜索文档错误!")
}
spaceUsers, err := models.SpaceUserModel.GetSpaceUsersByUserId(this.UserId)
if err != nil {
this.ErrorLog("搜索文档列表获取用户空间权限出错:" + err.Error())
this.ViewError("搜索文档错误!")
}
spaceIdsMap := make(map[string]bool)
for _, publicSpace := range publicSpaces {
spaceIdsMap[publicSpace["space_id"]] = true
}
for _, spaceUser := range spaceUsers {
if _, ok := spaceIdsMap[spaceUser["space_id"]]; !ok {
spaceIdsMap[spaceUser["space_id"]] = true
}
if count > 0 {
documents, err = models.DocumentModel.GetDocumentsByLikeNameAndLimit(documentName, limit, number)
if err != nil {
this.ErrorLog("搜索文档列表出错:" + err.Error())
this.ViewError("搜索文档错误!")
}
searchDocContents := make(map[string]string)
// 默认根据内容搜索
if searchType == "title" {
documents, err = models.DocumentModel.GetDocumentsByLikeName(keyword)
} else {
searchRes := global.DocSearcher.SearchDoc(types.SearchReq{Text: keyword})
searchDocIds := []string{}
for _, searchDoc := range searchRes.Docs {
if len(searchDoc.TokenSnippetLocs) == 0 {
continue
}
docId := searchDoc.DocId
content := searchDoc.Content
locIndex := searchDoc.TokenSnippetLocs[0]
searchContent := utils.Misc.SubStrUnicodeBySubStrIndex(content, keyword, locIndex, 30, 30)
searchDocContents[docId] = searchContent
searchDocIds = append(searchDocIds, docId)
}
documents, err = models.DocumentModel.GetDocumentsByDocumentIds(searchDocIds)
}
if err != nil {
this.ErrorLog("搜索文档出错:" + err.Error())
this.ViewError("搜索文档错误!")
}
// 过滤一下没权限的空间
realDocuments := []map[string]string{}
for _, document := range documents {
spaceId, _ := document["space_id"]
documentId, _ := document["document_id"]
if _, ok := spaceIdsMap[spaceId]; !ok {
continue
}
if searchType != "title" {
searchContent, ok := searchDocContents[documentId]
if !ok || searchContent == "" {
continue
}
document["search_content"] = searchContent
}
realDocuments = append(realDocuments, document)
}

this.Data["document_name"] = documentName
this.Data["documents"] = documents
this.Data["count"] = count
this.SetPaginator(number, count)
this.Data["search_type"] = searchType
this.Data["keyword"] = keyword
this.Data["documents"] = realDocuments
this.Data["count"] = len(realDocuments)
this.viewLayout("main/search", "default")
}
28 changes: 28 additions & 0 deletions app/models/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,19 @@ func (d *Document) GetAllDocumentsByDocumentIds(documentIds []string) (documents
return
}

func (d *Document) GetAllDocuments() (documents []map[string]string, err error) {
db := G.DB()
var rs *mysql.ResultSet
rs, err = db.Query(db.AR().From(Table_Document_Name).Where(map[string]interface{}{
"is_delete": Document_Delete_False,
}))
if err != nil {
return
}
documents = rs.Rows()
return
}

func (d *Document) GetParentDocumentsByDocument(document map[string]string) (parentDocuments []map[string]string, pageFile string, err error) {

if document["parent_id"] == "0" {
Expand Down Expand Up @@ -629,3 +642,18 @@ func (d *Document) GetDocumentGroupEditUserId() (documents []map[string]string,
documents = rs.Rows()
return
}

// 根据文档信息获取文档内容和文件地址
func (d *Document) GetDocumentContentByDocument(doc map[string]string) (content string, pageFile string, err error) {
// get document page file
_, pageFile, err = DocumentModel.GetParentDocumentsByDocument(doc)
if err != nil {
return content, pageFile, err
}
// get document content
content, err = utils.Document.GetContentByPageFile(pageFile)
if err != nil {
return content, pageFile, err
}
return content, pageFile, nil
}
17 changes: 17 additions & 0 deletions app/models/space.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,23 @@ func (s *Space) GetSpaces() (spaces []map[string]string, err error) {
return
}

// get spaces by visitLevel
func (s *Space) GetSpacesByVisitLevel(visitLevel string) (spaces []map[string]string, err error) {

db := G.DB()
var rs *mysql.ResultSet
rs, err = db.Query(
db.AR().From(Table_Space_Name).Where(map[string]interface{}{
"visit_level": visitLevel,
"is_delete": Space_Delete_False,
}))
if err != nil {
return
}
spaces = rs.Rows()
return
}

// get space count
func (s *Space) CountSpaces() (count int64, err error) {

Expand Down
59 changes: 59 additions & 0 deletions app/utils/misc.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,62 @@ func (m *misc) Page(total, page, pagesize int, url string, args ...interface{})
}
return ""
}

// 获取字符串子串的位置
func (m *misc) GetStrUnicodeIndex(str string, substr string) int {
// 子串在字符串的字节位置
result := strings.Index(str, substr)
if result >= 0 {
return m.GetStrUnicodeIndexByByteIndex(str, result)
}
return -1
}

// 根据字符串字节位置获取字符串位置
func (m *misc) GetStrUnicodeIndexByByteIndex(str string, subStrByteIndex int) int {
if subStrByteIndex > len(str)-1 {
return -1
}
// 获得子串之前的字符串并转换成[]byte
prefix := []byte(str)[0:subStrByteIndex]
// 将子串之前的字符串转换成[]rune
rs := []rune(string(prefix))
// 获得子串之前的字符串的长度,便是子串在字符串的字符位置
result := len(rs)
return result
}

// 截取包含子字符串的一段字符串,前后截取
func (m *misc) SubStrUnicode(str string, subStr string, preLen int, sufLen int) string {
subStrRune := []rune(subStr)
strRune := []rune(str)
count := len(strRune)
subStrUnicodeIndex := m.GetStrUnicodeIndex(str, subStr)
startIndex := 0
endIndex := count - 1
if subStrUnicodeIndex-preLen > 0 {
startIndex = subStrUnicodeIndex - preLen
}
if subStrUnicodeIndex+len(subStrRune)+sufLen < count-1 {
endIndex = subStrUnicodeIndex + len(subStrRune) + sufLen
}
return string(strRune[startIndex:endIndex])
}

// 截取包含子字符串的一段字符串,前后截取
// subStrIndex 已只子串的字节位置
func (m *misc) SubStrUnicodeBySubStrIndex(str string, subStr string, subStrIndex int, preLen int, sufLen int) string {
subStrRune := []rune(subStr)
strRune := []rune(str)
count := len(strRune)
subStrUnicodeIndex := m.GetStrUnicodeIndexByByteIndex(str, subStrIndex)
startIndex := 0
endIndex := count - 1
if subStrUnicodeIndex-preLen > 0 {
startIndex = subStrUnicodeIndex - preLen
}
if subStrUnicodeIndex+len(subStrRune)+sufLen < count-1 {
endIndex = subStrUnicodeIndex + len(subStrRune) + sufLen
}
return string(strRune[startIndex:endIndex])
}
Loading

0 comments on commit 3c9c94a

Please sign in to comment.