diff --git a/services/parse_service.go b/services/parse_service.go index fb49045..f3688b9 100644 --- a/services/parse_service.go +++ b/services/parse_service.go @@ -2,6 +2,8 @@ package services import ( "context" + "crypto/sha256" + "encoding/hex" "fmt" "io" "log" @@ -10,6 +12,8 @@ import ( "path" "regexp" mongo_client "stockbackend/clients/mongo" + "stockbackend/utils/helpers" + "strconv" "strings" "time" "unicode" @@ -17,8 +21,14 @@ import ( "github.com/cloudinary/cloudinary-go/v2" "github.com/cloudinary/cloudinary-go/v2/api/admin" "github.com/cloudinary/cloudinary-go/v2/api/uploader" + "github.com/extrame/xls" + "github.com/getsentry/sentry-go" "github.com/google/uuid" "github.com/robfig/cron/v3" + "github.com/wailsapp/mimetype" + "github.com/xuri/excelize/v2" + "go.mongodb.org/mongo-driver/mongo" + "go.uber.org/zap" "gopkg.in/mgo.v2/bson" ) @@ -64,17 +74,17 @@ func UpdateFunds() { } scheduler.Start() } else { - fmt.Println("Skipping the regular scheduler as debug mode is enabled.") - fmt.Println("Creating a scheduler that will run every 1 minute.") - jobID, err := scheduler.AddFunc("* * * * *", performUploadTask) - + // fmt.Println("Skipping the regular scheduler as debug mode is enabled.") + // fmt.Println("Creating a scheduler that will run every 1 minute.") + // jobID, err := scheduler.AddFunc("* * * * *", performUploadTask) + performUploadTask() // Need this here for proper Next time calculation - scheduler.Start() - if err != nil { - fmt.Println("An error occurred: the scheduler could not be added.") - } else { - fmt.Println("Next run time for Debug Scheduler:", scheduler.Entry(jobID).Next) - } + // scheduler.Start() + // if err != nil { + // // fmt.Println("An error occurred: the scheduler could not be added.") + // } else { + // // fmt.Println("Next run time for Debug Scheduler:", scheduler.Entry(jobID).Next) + // } } log.Println("Scheduler started") @@ -177,7 +187,7 @@ func extractPortfolioLinks(htmlContent string) []MFCOLLECTION { htmlContent = cleanHTMLContent(htmlContent) matches := re.FindAllStringSubmatch(htmlContent, -1) - fmt.Println("Total Matches Found:", len(matches)) // Debugging: Show total matches found + // fmt.Println("Total Matches Found:", len(matches)) // Debugging: Show total matches found var mfDetails []MFCOLLECTION for _, match := range matches { @@ -205,10 +215,10 @@ func extractPortfolioLinks(htmlContent string) []MFCOLLECTION { year: year, link: link, }) - // fmt.Println("Entire matched text:", entireText) - // fmt.Println("Month:", month) // Print extracted month - // fmt.Println("Year:", year) // Print extracted year - // fmt.Println("Link:", link) // Print the link + // // fmt.Println("Entire matched text:", entireText) + // // fmt.Println("Month:", month) // Print extracted month + // // fmt.Println("Year:", year) // Print extracted year + // // fmt.Println("Link:", link) // Print the link } } return mfDetails @@ -237,7 +247,63 @@ func uploadToCloudinary(fileURL string, mfData MFCOLLECTION) { return } + response, err := http.Get(secureUrl) + if err != nil { + log.Println("Error downloading xlsx file:", err) + return + } + bodyBytes, err := io.ReadAll(response.Body) + if err != nil { + log.Println("Error reading response body:", err) + return + } + + defer response.Body.Close() + + // Save the downloaded file to a temporary location + m := mimetype.Detect(bodyBytes) + // log.Println("Detected MIME type:", m.String()) + + // Assign file extension based on MIME type + var fileExt string + if m.Is("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") { + fileExt = ".xlsx" + } else if m.Is("application/vnd.ms-excel") { + fileExt = ".xls" + } else { + // log.Println("Downloaded file is not a supported Excel format (.xlsx or .xls).") + return + } + tempInputFile := fmt.Sprintf("%s%s", uuid.New().String(), fileExt) + err = os.WriteFile(tempInputFile, bodyBytes, 0644) + if err != nil { + log.Println("Error saving Excel file:", err) + return + } + defer os.Remove(tempInputFile) month := extractMonth(publicID) + + if fileExt == ".xlsx" { + // Process .xlsx file + err = processXLSXFile(tempInputFile, month) + if err != nil { + // fmt.Println("tempInputFile", tempInputFile) + log.Println("Error processing .xlsx file:", err) + return + } + } else if fileExt == ".xls" { + // Process .xls file + // fmt.Println("tempInputFile", tempInputFile) + err = processXLSFile(tempInputFile, month) + if err != nil { + log.Println("Error processing .xls file:", err) + return + } + } else { + log.Println("Unsupported file format:", fileExt) + return + } + fileUUID := uuid.New().String() document := bson.M{ "_id": fileUUID, @@ -257,6 +323,424 @@ func uploadToCloudinary(fileURL string, mfData MFCOLLECTION) { log.Printf("Document inserted successfully into MongoDB. UUID: %s\n", fileUUID) } +func processXLSXFile(tempInputFile, month string) error { + // Open the xlsx file using excelize + xlsxFile, err := excelize.OpenFile(tempInputFile) + if err != nil { + // fmt.Println("Error opening xlsx file:", err) + return fmt.Errorf("error opening xlsx file: %v", err) + } + defer xlsxFile.Close() + + // Get all sheet names + sheetList := xlsxFile.GetSheetList() + // stock := make([]map[string]interface{}, 0) + + for _, sheet := range sheetList { + rows, err := xlsxFile.GetRows(sheet) + if err != nil { + sentry.CaptureException(err) + zap.L().Error("Error reading rows from sheet", zap.String("sheet", sheet), zap.Error(err)) + continue + } + headerFound := false + headerMap := make(map[string]int) + stopExtracting := false + stockDetail := make([]map[string]interface{}, 0) + processingStarted := false + mutualFundName := "" + if len(rows) > 0 && len(rows[0]) > 1 { + mutualFundName = rows[0][1] + // log.Printf("Mutual Fund Name for sheet %s: %s", sheet, mutualFundName) + } + for _, row := range rows { + stock := make(map[string]interface{}, 0) + + if len(row) == 0 { + continue + } + if !headerFound { + for _, cell := range row { + if helpers.MatchHeader(cell, []string{`name\s*of\s*(the)?\s*instrument`}) { + headerFound = true + // Build the header map + for i, headerCell := range row { + normalizedHeader := helpers.NormalizeString(headerCell) + // Map possible variations to standard keys + switch { + case helpers.MatchHeader(normalizedHeader, []string{`name\s*of\s*(the)?\s*instrument`}): + headerMap["Name of the Instrument"] = i + case helpers.MatchHeader(normalizedHeader, []string{`isin`}): + headerMap["ISIN"] = i + case helpers.MatchHeader(normalizedHeader, []string{`rating\s*/\s*industry`, `industry\s*/\s*rating`}): + headerMap["Industry/Rating"] = i + case helpers.MatchHeader(normalizedHeader, []string{`quantity`}): + headerMap["Quantity"] = i + case helpers.MatchHeader(normalizedHeader, []string{`market\s*/\s*fair\s*value.*`, `market\s*value.*`}): + headerMap["Market/Fair Value"] = i + case helpers.MatchHeader(normalizedHeader, []string{`%.*nav`, `%.*net\s*assets`}): + headerMap["Percentage of AUM"] = i + } + } + // zap.L().Info("Header found", zap.Any("headerMap", headerMap)) + break + } + } + continue + } + + joinedRow := strings.Join(row, "") + if strings.Contains(strings.ToLower(joinedRow), "subtotal") || strings.Contains(strings.ToLower(joinedRow), "total") { + stopExtracting = true + break + } + if !processingStarted { + nameOfInstrument := "" + if idx, exists := headerMap["Name of the Instrument"]; exists && idx < len(row) { + nameOfInstrument = row[idx] + } + if strings.Contains(nameOfInstrument, "Equity & Equity related") { + processingStarted = true + continue // Skip the header description row and move to the next row + } + } + if processingStarted && !stopExtracting { + for key, idx := range headerMap { + if idx < len(row) { + // println(stockDetail["Name of the Instrument"].(string), "Name of the Instrument in stockDetail") + stock[key] = row[idx] + } else { + stock[key] = "" + } + } + _, ok := stock["Name of the Instrument"].(string) + if ok { + stockDetail = append(stockDetail, stock) + // println("Stock Detail: isin", stock["ISIN"].(string), "Name of the Instrument", stock["Name of the Instrument"].(string)) + } + } + } + // println(mutualFundName, "Mutual Fund Name") + hash := sha256.New() + hashName := sha256.New() + hashName.Write([]byte(mutualFundName)) + + hash.Write([]byte(mutualFundName + month)) + hashedId := hex.EncodeToString(hash.Sum(nil)) + hashedName := hex.EncodeToString(hashName.Sum(nil)) + collection := mongo_client.Client.Database(os.Getenv("DATABASE")).Collection(os.Getenv("MFHOLDING")) + + validStockDetails := []map[string]interface{}{} + + for _, stockDetail := range stockDetail { + if _, ok := stockDetail["Name of the Instrument"]; !ok { + // fmt.Println("Skipping entry: Missing Name of the Instrument") + continue + } + if _, ok := stockDetail["ISIN"]; !ok || stockDetail["ISIN"] == "" { + // fmt.Println("Skipping entry: Missing ISIN") + continue + } + if _, ok := stockDetail["Quantity"]; !ok { + // // fmt.Println("Skipping entry: Missing Quantity") + continue + } + if _, ok := stockDetail["Market/Fair Value"]; !ok { + // // fmt.Println("Skipping entry: Missing Market/Fair Value") + continue + } + if _, ok := stockDetail["Percentage of AUM"]; !ok { + // // fmt.Println("Skipping entry: Missing Percentage of AUM") + continue + } + validStockDetails = append(validStockDetails, stockDetail) + } + stockDetail = validStockDetails + + document := bson.M{ + "_id": hashedId, + "month": month, + "mutual_fund_name": mutualFundName, + "stock_details": stockDetail, + "hash": hashedName, + "created_at": time.Now(), + } + if mutualFundName == "" || stockDetail == nil || len(stockDetail) == 0 { + // fmt.Println("Skipping empty document") + continue + } + var existingDocument bson.M + err = collection.FindOne(context.TODO(), bson.M{"_id": hashedId}).Decode(&existingDocument) + if err == mongo.ErrNoDocuments { + // Document does not exist, so insert it + _, err := collection.InsertOne(context.TODO(), document) + if err != nil { + log.Fatal(err) + } + // fmt.Println("Document inserted with ID:", insertResult.InsertedID) + } else if err != nil { + log.Fatal(err) + } else { + // Document already exists + // // fmt.Println("Document already exists, skipping insertion.") + } + + } + return nil +} + +func safeGetRow(sheet *xls.WorkSheet, rowIndex int) (*xls.Row, bool) { + defer func() { + if r := recover(); r != nil { + // log.Printf("Recovered from panic when accessing row %d: %v", rowIndex, r) + } + }() + + // Attempt to access the row + row := sheet.Row(rowIndex) + if row == nil { + return nil, false + } + + return row, true +} + +// { +// "mutual_fund_name": "text", +// "stock_details.ISIN": "text", +// "stock_details.Name of the Instrument": "text" +// } + +func processXLSFile(tempInputFile, month string) error { + xlsFile, err := xls.Open(tempInputFile, "utf-8") + if err != nil { + log.Fatalf("Failed to open file: %v", err) + } + + for sheetIndex := 0; sheetIndex < xlsFile.NumSheets(); sheetIndex++ { + sheet := xlsFile.GetSheet(sheetIndex) + if sheet == nil { + // log.Printf("Sheet at index %d is nil. Skipping.", sheetIndex) + continue + } + + // log.Printf("Processing sheet: %s with MaxRow: %d", sheet.Name, sheet.MaxRow) + + headerFound := false + headerMap := make(map[string]int) + stopExtracting := false + processingStarted := false + stockDetail := make([]map[string]interface{}, 0) + + // Extract mutual fund name from the first row, second column + var mutualFundName string + if sheet.MaxRow > 0 { + firstRow, ok := safeGetRow(sheet, 0) + if !ok { + continue + } + if firstRow != nil && firstRow.LastCol() > 1 { + mutualFundName = firstRow.Col(1) + // log.Printf("Mutual Fund Name for sheet %s: %s", sheet.Name, mutualFundName) + } + } + + for rowIndex := 0; rowIndex < int(sheet.MaxRow); rowIndex++ { + if rowIndex >= int(sheet.MaxRow) { + // log.Printf("Row %d in sheet %s is nil or out of range. Skipping.", rowIndex, sheet.Name) + continue + } + // println("Row Index: ", rowIndex, int(sheet.MaxRow)) + row, ok := safeGetRow(sheet, rowIndex) + if !ok || row.LastCol() == 0 { + continue + } + + // Detect headers if not already found + if !headerFound { + for colIndex := 0; colIndex < min(row.LastCol(), 10); colIndex++ { // Limit columns for header detection + cellValue := strings.TrimSpace(strings.ToLower(row.Col(colIndex))) + if strings.Contains(cellValue, "name of the instrument") { + headerFound = true + // Build the header map + for i := 0; i < min(row.LastCol(), 10); i++ { // Limit column count to avoid extra empty columns + header := strings.TrimSpace(strings.ToLower(row.Col(i))) + switch { + case strings.Contains(header, "name of the instrument"): + headerMap["Name of the Instrument"] = i + case strings.Contains(header, "isin"): + headerMap["ISIN"] = i + case strings.Contains(header, "rating/industry") || strings.Contains(header, "industry/rating"): + headerMap["Industry/Rating"] = i + case strings.Contains(header, "quantity"): + headerMap["Quantity"] = i + case strings.Contains(header, "market/fair value") || strings.Contains(header, "market value"): + headerMap["Market/Fair Value"] = i + case strings.Contains(header, "% nav") || strings.Contains(header, "% to nav") || strings.Contains(header, "% net assets"): + headerMap["Percentage of AUM"] = i + } + } + // log.Printf("Header found: %v", headerMap) + break + } + } + continue + } + + // Stop extraction if "Subtotal" or "Total" is encountered + var joinedRow string + for colIndex := 0; colIndex < row.LastCol(); colIndex++ { + joinedRow += row.Col(colIndex) + } + + if strings.Contains(strings.ToLower(joinedRow), "subtotal") || strings.Contains(strings.ToLower(joinedRow), "total") { + stopExtracting = true + break + } + + // Start processing only after "Equity & Equity related" is encountered + if !processingStarted { + nameOfInstrument := "" + if idx, exists := headerMap["Name of the Instrument"]; exists && idx < row.LastCol() { + nameOfInstrument = row.Col(idx) + } + if strings.Contains(nameOfInstrument, "Equity & Equity related") { + processingStarted = true + continue // Skip the header description row and move to the next row + } + } + + // Check if we need to adjust column indices for this row + adjustColumns := false + if headerMap["ISIN"] < row.LastCol() { + isinValue := row.Col(headerMap["ISIN"]) + if !strings.HasPrefix(isinValue, "INE") { // Assuming valid ISINs start with "INE" + // log.Printf("Adjusting row %d columns by 1 due to misalignment", rowIndex) + adjustColumns = true + } + } + + // Data extraction based on header map with potential adjustment + if processingStarted && !stopExtracting { + stock := make(map[string]interface{}) + // log.Printf("Header map: %v", headerMap) + for key, colIndex := range headerMap { + adjustedColIndex := colIndex + if adjustColumns { + adjustedColIndex = colIndex + 1 + } + + if adjustedColIndex < row.LastCol() { + cellValue := row.Col(adjustedColIndex) + // log.Printf("Row %d, Key: %s, Expected Column: %d, Value: %s", rowIndex, key, adjustedColIndex, cellValue) + cleanedValue := strings.ReplaceAll(cellValue, ",", "") // Remove commas + + // Handle percentage values + if strings.HasSuffix(cleanedValue, "%") { + stock[key] = cleanedValue + } else if key == "Market/Fair Value" || key == "Quantity" { + // Force parsing Market/Fair Value and Quantity as floats + if parsedValue, err := strconv.ParseFloat(cleanedValue, 64); err == nil { + stock[key] = parsedValue + // log.Printf("Forced float parsing for key: %s, value: %v", key, parsedValue) + } else { + stock[key] = cellValue // Fallback to original if parsing fails + // log.Printf("Failed to parse as float for key: %s, value: %s", key, cellValue) + } + } else { + stock[key] = cellValue + } + } else { + stock[key] = "" + } + } + // println("*********************") + // Skip rows without meaningful data in "Name of the Instrument" + if stock["Name of the Instrument"] == "" { + continue + } + + // Append the stock detail to the list + stockDetail = append(stockDetail, stock) + } + } + // println(mutualFundName, "Mutual Fund Name") + hash := sha256.New() + hashName := sha256.New() + hashName.Write([]byte(mutualFundName)) + + hash.Write([]byte(mutualFundName + month)) + hashedId := hex.EncodeToString(hash.Sum(nil)) + hashedName := hex.EncodeToString(hashName.Sum(nil)) + collection := mongo_client.Client.Database(os.Getenv("DATABASE")).Collection(os.Getenv("MFHOLDING")) + + validStockDetails := []map[string]interface{}{} + + for _, stockDetail := range stockDetail { + if _, ok := stockDetail["Name of the Instrument"]; !ok { + // fmt.Println("Skipping entry: Missing Name of the Instrument") + continue + } + if _, ok := stockDetail["ISIN"]; !ok || stockDetail["ISIN"] == "" { + // fmt.Println("Skipping entry: Missing ISIN") + continue + } + if _, ok := stockDetail["Quantity"]; !ok { + // // fmt.Println("Skipping entry: Missing Quantity") + continue + } + if _, ok := stockDetail["Market/Fair Value"]; !ok { + // // fmt.Println("Skipping entry: Missing Market/Fair Value") + continue + } + if _, ok := stockDetail["Percentage of AUM"]; !ok { + // // fmt.Println("Skipping entry: Missing Percentage of AUM") + continue + } + validStockDetails = append(validStockDetails, stockDetail) + } + stockDetail = validStockDetails + + document := bson.M{ + "_id": hashedId, + "month": month, + "mutual_fund_name": mutualFundName, + "stock_details": stockDetail, + "hash": hashedName, + "created_at": time.Now(), + } + if mutualFundName == "" || stockDetail == nil || len(stockDetail) == 0 { + // fmt.Println("Skipping empty document") + continue + } + var existingDocument bson.M + err = collection.FindOne(context.TODO(), bson.M{"_id": hashedId}).Decode(&existingDocument) + if err == mongo.ErrNoDocuments { + // Document does not exist, so insert it + _, err := collection.InsertOne(context.TODO(), document) + if err != nil { + log.Fatal(err) + } + // // fmt.Println("Document inserted with ID:", insertResult.InsertedID) + } else if err != nil { + log.Fatal(err) + } else { + // Document already exists + // // fmt.Println("Document already exists, skipping insertion.") + } + + } + return nil +} + +// Helper function to limit column count to avoid misalignment issues +func min(a, b int) int { + if a < b { + return a + } + return b +} + func extractMonth(fileName string) string { patterns := []*regexp.Regexp{ regexp.MustCompile(`\d{2}\.\d{2}\.\d{4}`), // dd.mm.yyyy diff --git a/services/parse_service_test.go b/services/parse_service_test.go new file mode 100644 index 0000000..8c50a70 --- /dev/null +++ b/services/parse_service_test.go @@ -0,0 +1,60 @@ +package services + +import ( + "testing" +) + + +// Test generated using Keploy +func TestNormalizeWhitespace(t *testing.T) { + input := "This is a test" + expected := "This is a test" + result := normalizeWhitespace(input) + if result != expected { + t.Errorf("Expected %s but got %s", expected, result) + } +} + +// Test generated using Keploy +func TestRemoveZeroWidthChars(t *testing.T) { + input := "Hello\u200BWorld" + expected := "HelloWorld" + result := removeZeroWidthChars(input) + if result != expected { + t.Errorf("Expected %s but got %s", expected, result) + } +} + + +// Test generated using Keploy +func TestExtractMonth(t *testing.T) { + fileName := "report-03-2023.xlsx" + expected := "2023-03-01" + result := extractMonth(fileName) + if result != expected { + t.Errorf("Expected %s but got %s", expected, result) + } +} + + +// Test generated using Keploy +func TestExtractFileName(t *testing.T) { + url := "https://example.com/path/to/file.xlsx" + expected := "file" + result := extractFileName(url) + if result != expected { + t.Errorf("Expected %v but got %v", expected, result) + } +} + + +// Test generated using Keploy +func TestCleanHTMLContent(t *testing.T) { + input := "Hello\u200B World !" + expected := "Hello World !" + result := cleanHTMLContent(input) + if result != expected { + t.Errorf("Expected %v but got %v", expected, result) + } +} + diff --git a/utils/helpers/helpers_test.go b/utils/helpers/helpers_test.go index 7961d54..2794764 100644 --- a/utils/helpers/helpers_test.go +++ b/utils/helpers/helpers_test.go @@ -1,14 +1,19 @@ package helpers import ( - "reflect" - "stockbackend/types" - "strings" - "testing" - - "github.com/PuerkitoBio/goquery" - "go.mongodb.org/mongo-driver/bson/primitive" - "gopkg.in/mgo.v2/bson" + "reflect" + "stockbackend/types" + "strings" + "testing" + + "fmt" + "net/http" + "net/http/httptest" + "os" + + "github.com/PuerkitoBio/goquery" + "go.mongodb.org/mongo-driver/bson/primitive" + "gopkg.in/mgo.v2/bson" ) func TestMatchHeader_NonMatchingPattern(t *testing.T) { @@ -1274,3 +1279,272 @@ func TestCalculateOperatingEfficiencyScore_TotalAssetsMissing(t *testing.T) { t.Errorf("Expected %v got %v", expected, result) } } + +// Test generated using Keploy +func TestIncreaseInRoa_NotEnoughData(t *testing.T) { + netProfit := primitive.A{"1000"} + totalAssets := primitive.A{"5000"} + result := increaseInRoa(netProfit, totalAssets) + if result { + t.Errorf("Expected false, got %v", result) + } +} + +// Test generated using Keploy +func TestSafeToFloat_EmptyString(t *testing.T) { + input := "" + _, err := safeToFloat(input) + if err == nil { + t.Errorf("Expected error, got nil") + } +} + +// Test generated using Keploy +func TestFetchPeerData_Success(t *testing.T) { + // Create a mock server + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Simulate a valid HTML response + fmt.Fprintln(w, `
+Peer Company | +1000 | +15.0 | +5000 | +2.0% | +100 | +5% | +200 | +10% | +15% | +|
Total | +1 | +1000 | +15.0 | +5000 | +2.0% | +100 | +5% | +200 | +10% | +15% | +