Patch 1

Lithicsoft · Feb 9, 2024 · aef8545 · aef8545
1 parent 0b1c87e
commit aef8545
Show file tree

Hide file tree

Showing 14 changed files with 84 additions and 41 deletions.
diff --git a/account/__pycache__/database.cpython-312.pyc b/account/__pycache__/database.cpython-312.pyc
diff --git a/account/__pycache__/loader.cpython-312.pyc b/account/__pycache__/loader.cpython-312.pyc
diff --git a/account/main.py b/account/main.py
@@ -1,4 +1,3 @@
-import smtplib
 import time
 import streamlit as st
 from account.loader import account_database_loader

diff --git a/atmt.py b/atmt.py
@@ -0,0 +1,71 @@
+import requests
+from bs4 import BeautifulSoup
+from initializer.loader import database_loader
+from manager.insert import insert_data
+
+conn = database_loader()
+
+def summarize_text(text, max_length=100):
+    if len(text) <= max_length:
+        return text
+    else:
+        last_space_index = text.rfind(' ', 0, max_length)
+        return text[:last_space_index] + '...'
+
+def get_website_info(url):
+    try:
+        response = requests.get(url)
+
+        if response.status_code == 200:
+            soup = BeautifulSoup(response.content, 'html.parser')
+
+            title = soup.title.string.strip()
+
+            text_content = ''
+            for paragraph in soup.find_all(['p', 'div']):
+                text_content += paragraph.get_text().strip() + '\n'
+
+            meta_description = soup.find('meta', attrs={'name': 'description'})
+            description = meta_description['content'] if meta_description else ''
+
+            meta_keywords = soup.find('meta', attrs={'name': 'keywords'})
+            keywords = meta_keywords['content'] if meta_keywords else ''
+
+            return {
+                "title": title,
+                "text_content": text_content,
+                "description": description,
+                "keywords": keywords
+            }
+        else:
+            return None
+    except Exception as e:
+        print("Error:", e)
+        return None
+
+user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107 Safari/537.36'
+headers = {'User-Agent': user_agent}
+
+random_keyword = ' '.join(['Google', 'English'])
+search_url = f"https://www.google.com/search?q={random_keyword}&hl=en"
+
+response = requests.get(search_url, headers=headers)
+
+if response.status_code == 200:
+    soup = BeautifulSoup(response.text, 'html.parser')
+
+    search_results = soup.find_all('a')
+
+    random_urls = [link.get('href') for link in search_results if link.get('href') and link.get('href').startswith('http')]
+
+    for url in random_urls:
+        print("url: ", url)
+        website_info = get_website_info(url)
+        if website_info is None:
+            pass
+        else:
+            print("title: ", website_info["title"])
+            insert_data(conn, url, website_info["title"], website_info["text_content"], website_info["description"], website_info["keywords"], summarize_text(website_info["text_content"]))
+            print("---PASS---")
+else:
+    print("ERR.")
diff --git a/database/censorship.db b/database/censorship.db
diff --git a/database/search-index.db b/database/search-index.db
diff --git a/main.py b/main.py
@@ -3,14 +3,15 @@
 from initializer.loader import database_loader
 from manager.manager import *
 from search.index import Search_Data
-from streamlit_searchbox import st_searchbox
 
 conn = database_loader()
 
 st.title('MonoSearch')
 
 st.session_state.setdefault('form_state', True)
 
+Search_Result = []
+
 with st.form('Input_Form'):
     col1, col2, col3, col4, col5 = st.columns([3, 0.8, 0.6, 0.6, 0.8])
     AForm = st.session_state.form_state
@@ -31,7 +32,7 @@
         submitted4 = st.form_submit_button('Remove')
 
     if keyword and submitted1:
-        Search_Data(conn, keyword)
+        Search_Result = Search_Data(conn, keyword)
 
     if submitted2 and AForm == True:
         username = st.text_input('Username: ')
@@ -85,4 +86,9 @@
                 st.session_state.add_state = False
     elif submitted4 and not AForm:
         st.session_state.add_state = True
-
+
+for row in Search_Result:
+    st.markdown('```' + str(row[0]) + '``` ```' + row[1] + '```')
+    st.markdown("### [" + row[2] + ']' + '(' + row[1] + ')')
+    st.write(row[6])
+    st.markdown("&nbsp;&nbsp;&nbsp;")
diff --git a/manager/__pycache__/edit.cpython-312.pyc b/manager/__pycache__/edit.cpython-312.pyc
diff --git a/manager/__pycache__/insert.cpython-312.pyc b/manager/__pycache__/insert.cpython-312.pyc
diff --git a/manager/edit.py b/manager/edit.py
@@ -11,20 +11,6 @@
 
 allowed_extensions = {"http", "https"}
 
-def normalize(link):
-    parsed_url = urlparse(link)
-
-    if (splitext(parsed_url.path)[1][1:] not in allowed_extensions) and parsed_url.path:
-        return None
-
-    final_link = parsed_url.scheme + "://" + parsed_url.netloc + parsed_url.path
-    if parsed_url.port != None and parsed_url.port != -1:
-        final_link += ":" + str(parsed_url.port)
-    if not final_link.endswith("/") and "." not in final_link:
-        final_link += "/"
-
-    return final_link
-
 def content_exists(conn, link):
     with conn:
         cursor = conn.cursor()
@@ -60,7 +46,7 @@ def is_content_safe(link):
 def edit_data(conn, site_id, link, title, text, description, keywords, shorttext):
     added = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
 
-    normalize_link = normalize(link)
+    normalize_link = link
 
     try:
         response = requests.get(normalize_link)

diff --git a/manager/insert.py b/manager/insert.py
@@ -11,20 +11,6 @@
 
 allowed_extensions = {"http", "https"}
 
-def normalize(link):
-    parsed_url = urlparse(link)
-
-    if (splitext(parsed_url.path)[1][1:] not in allowed_extensions) and parsed_url.path:
-        return None
-
-    final_link = parsed_url.scheme + "://" + parsed_url.netloc + parsed_url.path
-    if parsed_url.port != None and parsed_url.port != -1:
-        final_link += ":" + str(parsed_url.port)
-    if not final_link.endswith("/") and "." not in final_link:
-        final_link += "/"
-
-    return final_link
-
 def content_exists(conn, link):
     with conn:
         cursor = conn.cursor()
@@ -68,7 +54,7 @@ def insert_data(conn, link, title, text, description, keywords, shorttext):
     else:
         site_id = max_site_id + 1
 
-    normalize_link = normalize(link)
+    normalize_link = link
 
     try:
         response = requests.get(normalize_link)

diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,3 @@
-streamlit-searchbox
 streamlit
 requests
 bs4
diff --git a/search/__pycache__/index.cpython-312.pyc b/search/__pycache__/index.cpython-312.pyc
diff --git a/search/index.py b/search/index.py
@@ -1,3 +1,4 @@
+import pandas as pd
 import streamlit as st
 
 def Search_Data(conn, keyword):
@@ -11,9 +12,4 @@ def Search_Data(conn, keyword):
     if len(rows) == 0:
         st.write("No results found")
     else:
-        for row in rows:
-            st.write(row[0]) 
-            st.write(row[1])
-            st.write(row[2])
-            st.write(row[6])
-            st.markdown("---")
+        return rows