From aef8545daf4373e3875feb3ae786305f54c3165a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=B9i=20Nguy=E1=BB=85n=20T=E1=BA=A5n=20Sang?= Date: Fri, 9 Feb 2024 16:07:07 +0700 Subject: [PATCH] Patch 1 --- account/__pycache__/database.cpython-312.pyc | Bin 903 -> 903 bytes account/__pycache__/loader.cpython-312.pyc | Bin 526 -> 526 bytes account/main.py | 1 - atmt.py | 71 +++++++++++++++++++ database/censorship.db | Bin 28672 -> 0 bytes database/search-index.db | Bin 49152 -> 0 bytes main.py | 12 +++- manager/__pycache__/edit.cpython-312.pyc | Bin 4725 -> 3957 bytes manager/__pycache__/insert.cpython-312.pyc | Bin 5157 -> 4389 bytes manager/edit.py | 16 +---- manager/insert.py | 16 +---- requirements.txt | 1 - search/__pycache__/index.cpython-312.pyc | Bin 1093 -> 748 bytes search/index.py | 8 +-- 14 files changed, 84 insertions(+), 41 deletions(-) create mode 100644 atmt.py delete mode 100644 database/censorship.db delete mode 100644 database/search-index.db diff --git a/account/__pycache__/database.cpython-312.pyc b/account/__pycache__/database.cpython-312.pyc index ed7b05e4e241d5428251c23239f9fbf4167cb6d2..83716477805787322f976cbf4923ccc897421757 100644 GIT binary patch delta 20 acmZo?Z)fK|&CAQh00avjAKl1Z!wdj4@dcUy delta 20 acmZo?Z)fK|&CAQh00bXGk8I?wVFmy*$OT&f diff --git a/account/__pycache__/loader.cpython-312.pyc b/account/__pycache__/loader.cpython-312.pyc index b45d892f4ff7cfbf1888ea8af49f6461efc84fb0..d6dba8a0af591fbea466cf0cc15c5b7d9f3f19ea 100644 GIT binary patch delta 20 acmeBU>0{wO&CAQh00fI4AKl2!%me^5u>~st delta 20 acmeBU>0{wO&CAQh00cn>M>cXZGXVfDLj)!O diff --git a/account/main.py b/account/main.py index 6939d98..6b46111 100644 --- a/account/main.py +++ b/account/main.py @@ -1,4 +1,3 @@ -import smtplib import time import streamlit as st from account.loader import account_database_loader diff --git a/atmt.py b/atmt.py new file mode 100644 index 0000000..667b57e --- /dev/null +++ b/atmt.py @@ -0,0 +1,71 @@ +import requests +from bs4 import BeautifulSoup +from initializer.loader import database_loader +from manager.insert import insert_data + +conn = database_loader() + +def summarize_text(text, max_length=100): + if len(text) <= max_length: + return text + else: + last_space_index = text.rfind(' ', 0, max_length) + return text[:last_space_index] + '...' + +def get_website_info(url): + try: + response = requests.get(url) + + if response.status_code == 200: + soup = BeautifulSoup(response.content, 'html.parser') + + title = soup.title.string.strip() + + text_content = '' + for paragraph in soup.find_all(['p', 'div']): + text_content += paragraph.get_text().strip() + '\n' + + meta_description = soup.find('meta', attrs={'name': 'description'}) + description = meta_description['content'] if meta_description else '' + + meta_keywords = soup.find('meta', attrs={'name': 'keywords'}) + keywords = meta_keywords['content'] if meta_keywords else '' + + return { + "title": title, + "text_content": text_content, + "description": description, + "keywords": keywords + } + else: + return None + except Exception as e: + print("Error:", e) + return None + +user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107 Safari/537.36' +headers = {'User-Agent': user_agent} + +random_keyword = ' '.join(['Google', 'English']) +search_url = f"https://www.google.com/search?q={random_keyword}&hl=en" + +response = requests.get(search_url, headers=headers) + +if response.status_code == 200: + soup = BeautifulSoup(response.text, 'html.parser') + + search_results = soup.find_all('a') + + random_urls = [link.get('href') for link in search_results if link.get('href') and link.get('href').startswith('http')] + + for url in random_urls: + print("url: ", url) + website_info = get_website_info(url) + if website_info is None: + pass + else: + print("title: ", website_info["title"]) + insert_data(conn, url, website_info["title"], website_info["text_content"], website_info["description"], website_info["keywords"], summarize_text(website_info["text_content"])) + print("---PASS---") +else: + print("ERR.") \ No newline at end of file diff --git a/database/censorship.db b/database/censorship.db deleted file mode 100644 index 977fe0d6fc9d8ae9063752af91caf11e2c243f2b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 28672 zcmeI#!A{#S9LI4RN-9;f;+E4iOVKoh6SXkwbXJ#LVJ}R z_6U11&e+?M1p?u=L-e&&RgVAtHU8L_|Ka|wHmUkLnO=;IdMDO|l;XWoLI`O;ZTrcG zo44|b{dV)`h7=pwudc1~R&Pb`NAIZD>Hh5Q+X({!1Q0*~0R#|0009IL_%8xqJkNg} z$hppvbfJ^QXm&c88j~)J&i=GQdXlxb4x)Y>sqMWF(NNv1QlIuzsZy;MzYmXAJpWZ7 zXGQc`dNrR+6Z@b@_A1GFQ6)HE2uCf?e-p^-A~#8ACsTcC?32GA*-`tJLr_)i7jP;pqdq+C4ryEBfql35V(Xw-$ovAn)#!sx9-)~)4;EZb4 zOD9lQmS2Oqn&J-D6^)Z5O;o%+hz{fa;B)ADbKCSY5I_I{1Q0*~0R#|0009ILK%l__ z9k)07{|28hTSEW=1Q0*~0R#|0009ILKtKrke{KN+2q1s}0tg_000IagfB*u`7oh)d z{u#4F1Q0*~0R#|0009ILKmY**=>NF`2q1s}0tg_000IagfB*srG+%)Jzxij(4iP{A S0R#|0009ILKmY**5V!+Bk@F$| diff --git a/database/search-index.db b/database/search-index.db deleted file mode 100644 index 741be249d0ab6379b016fb8f678874aca1b38177..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 49152 zcmeI*-%i_B90zb8+i~I~VAcs)Q}^dc*#sU5n51kpy;(d`mW@AcFchv3B!*f*jAE;> zYT6doC)mxNU>AFYN!#t-V=vHMw3?>&rsw2e-Tb@g6@#ypgPr4_Pk!g)lL%wSY}{XQ z1BV`X^^?7T=7b4>5aA0-1wl|*NwU(J__Ex2!saBjJZw=FW^2EXv9(F{XJP!;@vZUr z*l%O^lm8}{5`QIf@n7OM*a}P#fB*y_009U<;93iOpN#6aZrZmspJnd{Yd z4+4L;;?)k^!@;%5FU!S(U8HuwS}D@0!9i2gNAzB~v|1?d&~J)6Y5K^Z4@&k|>zg($ zuRka)HN-?zzjKE)ZVpVK>Q#LAabI5T>cr?2+EG)}Zk3kS?Ba5<{AM!#V`{CeTSnu( zcq@t0z$BPL;M9TvJ>=E#(Mhy79-c(SOw-CdD_K@@tlVN{ftAl#`P^tss8KzaBaP*O zxw+Nz-n>|qIyxs7F*WTwhi)}Z1E+qHrl*HBFCE4SuQq!2*>|z1zOXQKm+b|6y(v{$ zU2JqtJY+c1{;^j%YVWMMXjGq>iCqjFvOTZde$A~?+1@Oyw8v^sOq}*kn;WIIWqQ}% zSeW+N@yafDI(BPEOv??99gcP$q-oXhD|PoYVDFYRJ#xN1^XgSUP5p;nJ!nPjRjW?b z*ijUHDn-uRTGcsc_PgDPP$toKs-?B1;x>IFimtEGP$Zr1Zf|Tw6#bJF@w-iT(g?*X zeJyu_`Wo)6Fm`1{pGlD)y6uMg8frSy*K{bZul3M6<7-LLKS+^E_XRjusAajYWggJi zB(F49MMeK8McC(5+qb!wLXAoNjq&~1-x{wqF1ATjPo>C{v)*@Hs7bNiWb4x}w4#4B zvC-8TMxBqLG#wbm#~8JX+xC$4&3=sx;6@`F2xqY{G zI9u^fEH<|+{*$h{mAdD92Z5C}v$ry4He)W*?0nud^SQ-1>le}f6W9Y21Rwwb2tWV= z5P$##AOHafKmY<)Odu*vs{F45t>gcfg7)%?br9Ww00bZa0SG_<0uX=z1Rwwb2ta@f z$daa7IR3|AfB*y_009U<00Izz00bZa0SH`afza{)Q$c%rrTdD`K>z{}fB*y_009U< z00Izz00gd)z&n!Byz2k;Jppk3{~GOnbQJ;+fB*y_009U<00Izz00ge1K=b@RzyH6a z-4(RIwSU+Q69gat0SG_<0uX=z1Rwwb2tWV=Lj@Y|KKYSNL>4YSWkEklrfCv|L=}-_ z`CJmoBA?5I%o9;0GQSf)uVBmbd>$cUt{D*}WS`t1Yy(*&iYQ9#9)0XNk@#j2kx1;` z{%ivxlSuQH|9n{_>PhDw|Ll20lGRvDNyOCgiFh(eB;iB8J&#kgxFmecyTP9FLR#nl zUkKWZp`F7D2tWV=5P$##AOHafKmY;|fB*#kw*nDaO%jox|0miX0()SB00bZa0SG_< z0uX=z1Rwwb2teQx3Rvvyy_%WSG(*cZF9yiZ&F!=61!mbb0{nmbSr-5K`n&!Xzv>`k zW;5m@&CcgdGoM>*9sfTQv}c#FgGdPi5P$##AOHafKmY;|fB*y_0D-p^NU(1Nyx;jk wK#6RCuRHYNr_FnThnD;;#}vP)(LfuzL0 zz(X@<4<;nW=*9G6JVJ{Hub%dze*?(HlQSy?<4b1d_kQmyZ{E8oUgY#|0|Sb{yD0FObUIe0kH`bqSUj&QnEPPwAVUOQ1_?aNouR zqd2qZ?U(YLxN0znvOFrZV;wl2RYzl73nPekJQqV$WGe1wS3VFF<7UIgwO&$eZ9z24 z*cAO0ew#gKNZ1VX`%*OaitL8Hs3{$}4{BqGqq{Ub)?1NiH6s5F&^N?%JuQ6P|k z%RqsVI(3vnniu^~nrb6KkCa)U)9*^DFx)dYuae?HlJ2pMEX}GDV1$>TPt*`7(C6wr z7~{ImN?tn_X-Ay{Q-Nbti>bWzKpIIpt(oezXNIn8cV=%~*~fe6`Yj45f2jYHJ=6Z8 zeFf=3#HZtnuD9ohF+p3z-iBzLzR@2f`B(a(Jqx+E`z)9!(x_z24C{db+XU9^I^vV_ zWnvQtqT6)UD38u?M#A3i4w~msBXFPe#^eKtRQk~{hk^k{VBIFQxMYXDL-N#jYtc z+$|)kNx8c)ST*ERMP~k|hRm4Lq{wkvuI57%F+akZhSs)Y6ST-`qh)^a5A?;Scmc6> z5d}0!PhykzN}P>bj^pjYI$L43$=GJA8LV-zctH#A1$2`q;qCnnFK&DFT?gJp51GoB Y443Itd_FD%^Gr{J==rT2NCW|Y0CV-fF8}}l delta 1534 zcmZ`(O-vg{6yEWAy}SO!I0kHtvB53a65y1eCWOC|fTA?2^3zHchguH1DeD;9njK0& zMy67v=FkIl4&e}0<`mMX5*7ET4FRI|&^U<%T2)mJx#ZL;C>NxveY3?trS!==^XB`$ zd2i;;jNgRc#N5BQTy_p@;O4s8=Uj9*pc{*Ax?OSJhv zSZNk;7?|rnlrV3fc}r%-%voTT^S?9LNa3!(4=ED=!}2EEShOfs;sJS=8R6~X>IPZ5 zS(lm8LKk4oicqC=S+Nx@uvY;h3RTe+E-rrbiWPE}@9pY(^*7ia>fJ+Kd#E!mV^)65 zDrHy;X)@cNZAZA`2ruc+9jz<;YHy{z-xOD*z%Ba=hyRJy zzYELde)uDTdT>2>I1G&;jOV9vL}!;I0)~SObdTqAP)Gvx)O4cbRsch5Ut5mz-R8^= zCK3zjnQT6-;1g^lVO1|c-BsKJL!BKCi^n3;KLx)q1VAgb+m*1a zfg{Qw{a8NjXg}Z+!23OsIX5{>8(e!JZ=p~6NP6Cf1 zwSSNXQgN8xcLsvy`O2!Ik(81qY5W;z`{2{Z=kY&P-Cj|ii2F8#3cVgfqqr9wP zPtUxKc!+sy@%K0!Vm{m0p42y)SEjP+09Ihg{y_9$n)GznJ5k;1kbnZO-Swzu_q{Ok HnX~=@NQYM2 diff --git a/manager/__pycache__/insert.cpython-312.pyc b/manager/__pycache__/insert.cpython-312.pyc index 27de49c3468d416bfbe0a9f57185b09ffa65a159..8561a9dccaf86740eb5b0b11673e1c5775873d7f 100644 GIT binary patch delta 888 zcmY*X&1(}u6yM2ayJ_|#*=*A`CH+W0Od7S@YM>NtMZu&OLFz#~l{MMQ#?5Y+nUWS# zdeXCHt|AB)5s&fUQM4w4|G^3>c&Y~v7Cd-zX6xGeVBY-R@4YuOZ{FK4>`$s66(!D5 z`SSX`^+DQGhXLHnqv$r6mG_>lb*7`^VAzj3wtF8+G|U&O-lmIDl;Q~z=hxC|)pN1s z;&p4wMi{{q^{AaKVS&Gqpb%zRme@469nY-6DQ3l>g_@p=EI3U~)JN4sAW&oOgpJn2 zLy@^*L4&es@+UKut4e~;u&5PRylE2^ZRL?mf3J@443KM z@ETJEs(k(~DnU9!JO_&e>BZGNV^gN%crB|&=bDZ;vgy1AvmH!hP{Ioi&a*>Ufxlyy j8(wYGu~y)1YO)DH%jA=OF(rV^Pjv_+k1iB}9wht$6MVk# delta 1621 zcmZuxO-vg{6yDkO+Us5aV%cDD4F(jjDHxhWB_u#fDnLX^{s>~rGZ4XVurH39`5#h8wG)^LcRz097hvw2s;l`z%S=g9LJbC85 z`QG>D=e_5>-Fmyz_q*3CGDJF-H`ROI6<-6mzS3ccif5U*5}XG~@5j(2mGyQ#=34&=APkKf9E zFFkI-YaPjEgLX_!E4heGF*-jS*>@<9B<|C@iuAKDEz zZ3kPc!PfG7)nMw*mF@1KD)|M6er~KZ|I%CyzPogB#})jn^l53e{gEqP9#|h<8y~)V z^-){@j!<`V@~aDFaoxY>e;n(rMtZA4?+tNB3U5pCsuW)}o=9zXxVr<@&OwV@6{0u9 zT`By~5#A%$Wq$al1)PTU#DZ}$nfzpaR!0VHnie9kk%GR-yiO8{NIo?K8L*9jv5ha; zKJINRvYV1z1$jP~mlb%5t^}wW1(J6ao+e9!7KiR<@95WzjJ>HLPpR-cA+%wSbB%>D z{Jk>@8t|{q$|xdjb09@vH_4*pW@@dM4cLsDE=n)Co}wok!auoVJ`}bI1d8ClT?r7y z0U->U@ChLeBKUow1GM2!gnFsjW@g)QSx6*0>~(EpBZ7{yo(d(9q{n!Uz$yH>yASjc zxGPN?#FA{xwraYafz6A7BatmQBA%6AwHetS{Gk{#8 zc-h+x;`p97fxcmLXViq@Gl3{5U;)z1K>T?N!^D^^EWr$#ER(sI(wQ{bCbu(bD{8XbVoA==%e%#%omyF* zUzAb=(r}BVD8Ib87{r|Xm?@5n56A#o?kyp}82*{bvkiw9{n8Ogon8MV;5XDrqlNfIcvnh~OwkpU{of>o3iL(~Om z0voDm2`9uX28Jvi7z;tCup{j1sbpcOWYFZ8EW?;C_7dcTmmqh)WCIda%(}X|n(UKX z8MO^>apWczWvAqq=V`LuVoA==%e%#%omyF*UzAeB4pe)Kr6|9=_!cvW1bJ3r@V?HWF|cyVcR1hS;p_M4^q3GlLvUi)6&@LK_}-n~Ggz2KczG$)vib!$OU=0 Xm={QVU}j`wyv-o?nMHt69;_Y!L&0xe diff --git a/search/index.py b/search/index.py index 8ebef27..7a2271f 100644 --- a/search/index.py +++ b/search/index.py @@ -1,3 +1,4 @@ +import pandas as pd import streamlit as st def Search_Data(conn, keyword): @@ -11,9 +12,4 @@ def Search_Data(conn, keyword): if len(rows) == 0: st.write("No results found") else: - for row in rows: - st.write(row[0]) - st.write(row[1]) - st.write(row[2]) - st.write(row[6]) - st.markdown("---") + return rows