Skip to content

Commit

Permalink
Patch 1
Browse files Browse the repository at this point in the history
  • Loading branch information
EndermanPC committed Feb 9, 2024
1 parent 0b1c87e commit aef8545
Show file tree
Hide file tree
Showing 14 changed files with 84 additions and 41 deletions.
Binary file modified account/__pycache__/database.cpython-312.pyc
Binary file not shown.
Binary file modified account/__pycache__/loader.cpython-312.pyc
Binary file not shown.
1 change: 0 additions & 1 deletion account/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import smtplib
import time
import streamlit as st
from account.loader import account_database_loader
Expand Down
71 changes: 71 additions & 0 deletions atmt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import requests
from bs4 import BeautifulSoup
from initializer.loader import database_loader
from manager.insert import insert_data

conn = database_loader()

def summarize_text(text, max_length=100):
if len(text) <= max_length:
return text
else:
last_space_index = text.rfind(' ', 0, max_length)
return text[:last_space_index] + '...'

def get_website_info(url):
try:
response = requests.get(url)

if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')

title = soup.title.string.strip()

text_content = ''
for paragraph in soup.find_all(['p', 'div']):
text_content += paragraph.get_text().strip() + '\n'

meta_description = soup.find('meta', attrs={'name': 'description'})
description = meta_description['content'] if meta_description else ''

meta_keywords = soup.find('meta', attrs={'name': 'keywords'})
keywords = meta_keywords['content'] if meta_keywords else ''

return {
"title": title,
"text_content": text_content,
"description": description,
"keywords": keywords
}
else:
return None
except Exception as e:
print("Error:", e)
return None

user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107 Safari/537.36'
headers = {'User-Agent': user_agent}

random_keyword = ' '.join(['Google', 'English'])
search_url = f"https://www.google.com/search?q={random_keyword}&hl=en"

response = requests.get(search_url, headers=headers)

if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')

search_results = soup.find_all('a')

random_urls = [link.get('href') for link in search_results if link.get('href') and link.get('href').startswith('http')]

for url in random_urls:
print("url: ", url)
website_info = get_website_info(url)
if website_info is None:
pass
else:
print("title: ", website_info["title"])
insert_data(conn, url, website_info["title"], website_info["text_content"], website_info["description"], website_info["keywords"], summarize_text(website_info["text_content"]))
print("---PASS---")
else:
print("ERR.")
Binary file removed database/censorship.db
Binary file not shown.
Binary file removed database/search-index.db
Binary file not shown.
12 changes: 9 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
from initializer.loader import database_loader
from manager.manager import *
from search.index import Search_Data
from streamlit_searchbox import st_searchbox

conn = database_loader()

st.title('MonoSearch')

st.session_state.setdefault('form_state', True)

Search_Result = []

with st.form('Input_Form'):
col1, col2, col3, col4, col5 = st.columns([3, 0.8, 0.6, 0.6, 0.8])
AForm = st.session_state.form_state
Expand All @@ -31,7 +32,7 @@
submitted4 = st.form_submit_button('Remove')

if keyword and submitted1:
Search_Data(conn, keyword)
Search_Result = Search_Data(conn, keyword)

if submitted2 and AForm == True:
username = st.text_input('Username: ')
Expand Down Expand Up @@ -85,4 +86,9 @@
st.session_state.add_state = False
elif submitted4 and not AForm:
st.session_state.add_state = True


for row in Search_Result:
st.markdown('```' + str(row[0]) + '``` ```' + row[1] + '```')
st.markdown("### [" + row[2] + ']' + '(' + row[1] + ')')
st.write(row[6])
st.markdown("&nbsp;&nbsp;&nbsp;")
Binary file modified manager/__pycache__/edit.cpython-312.pyc
Binary file not shown.
Binary file modified manager/__pycache__/insert.cpython-312.pyc
Binary file not shown.
16 changes: 1 addition & 15 deletions manager/edit.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,6 @@

allowed_extensions = {"http", "https"}

def normalize(link):
parsed_url = urlparse(link)

if (splitext(parsed_url.path)[1][1:] not in allowed_extensions) and parsed_url.path:
return None

final_link = parsed_url.scheme + "://" + parsed_url.netloc + parsed_url.path
if parsed_url.port != None and parsed_url.port != -1:
final_link += ":" + str(parsed_url.port)
if not final_link.endswith("/") and "." not in final_link:
final_link += "/"

return final_link

def content_exists(conn, link):
with conn:
cursor = conn.cursor()
Expand Down Expand Up @@ -60,7 +46,7 @@ def is_content_safe(link):
def edit_data(conn, site_id, link, title, text, description, keywords, shorttext):
added = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

normalize_link = normalize(link)
normalize_link = link

try:
response = requests.get(normalize_link)
Expand Down
16 changes: 1 addition & 15 deletions manager/insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,6 @@

allowed_extensions = {"http", "https"}

def normalize(link):
parsed_url = urlparse(link)

if (splitext(parsed_url.path)[1][1:] not in allowed_extensions) and parsed_url.path:
return None

final_link = parsed_url.scheme + "://" + parsed_url.netloc + parsed_url.path
if parsed_url.port != None and parsed_url.port != -1:
final_link += ":" + str(parsed_url.port)
if not final_link.endswith("/") and "." not in final_link:
final_link += "/"

return final_link

def content_exists(conn, link):
with conn:
cursor = conn.cursor()
Expand Down Expand Up @@ -68,7 +54,7 @@ def insert_data(conn, link, title, text, description, keywords, shorttext):
else:
site_id = max_site_id + 1

normalize_link = normalize(link)
normalize_link = link

try:
response = requests.get(normalize_link)
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
streamlit-searchbox
streamlit
requests
bs4
Binary file modified search/__pycache__/index.cpython-312.pyc
Binary file not shown.
8 changes: 2 additions & 6 deletions search/index.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import pandas as pd
import streamlit as st

def Search_Data(conn, keyword):
Expand All @@ -11,9 +12,4 @@ def Search_Data(conn, keyword):
if len(rows) == 0:
st.write("No results found")
else:
for row in rows:
st.write(row[0])
st.write(row[1])
st.write(row[2])
st.write(row[6])
st.markdown("---")
return rows

0 comments on commit aef8545

Please sign in to comment.