api/api.py

#!/bin/env python3

import time
import json
import random
import os
import sys
import requests
import collections
import urllib.request
from bs4 import BeautifulSoup

collections.Callable = collections.abc.Callable

nu = "\033[0m"
re = "\033[1;31m"
gr = "\033[1;32m"
cy = "\033[1;36m"

raw_tags = []
tag_lis = []

useragent = [
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393"
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36",
    "Mozilla/5.0 (iPad; CPU OS 8_4_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12H321 Safari/600.1.4"
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0.1 Safari/602.2.14",
    "Mozilla/5.0 (Linux; U; Android-4.0.3; en-us; Galaxy Nexus Build/IML74K) AppleWebKit/535.7 (KHTML, like Gecko) CrMo/16.0.912.75 Mobile Safari/535.7",
]


class extra:
    def tiny_url(url):
        apiurl = "http://tinyurl.com/api-create.php?url="
        tinyurl = urllib.request.urlopen(apiurl + url).read()
        return tinyurl.decode("utf-8")

    def write(in_text):
        for char in in_text:
            time.sleep(0.1)
            sys.stdout.write(char)
            sys.stdout.flush()

    def extract_hash_tags(stri):
        return [part[1:] for part in stri.split() if part.startswith("#")]

    def banner():
        print(
            f"""{cy}
  ╔═╗ ╔═╗     ╦  ╔═╗
  ╚═╗ ║ ║     ║  ║ ╦
  ╚═╝ ╚═╝  {gr}o{cy}  ╩  ╚═╝
 
        {gr}Coded By :
  {gr}Yezz123{nu}
	            """
        )


class main:
    def __init__(self, user):
        self.user = user
        self.get_profile()

    def get_profile(self):
        extra.write(f"\n{gr}[+]{nu} getting profile ...")
        profile = requests.get(
            f"https://www.instagram.com/{self.user}",
            headers={
                "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"
            },
        )
        soup = BeautifulSoup(profile.text, "html.parser")
        more_data = soup.find_all("script", attrs={"type": "text/javascript"})
        self.data = json.loads(more_data[3].get_text()[21:].strip(";"))
        self.p_data = self.data["entry_data"]["ProfilePage"][0]["graphql"]["user"]
        self.output = {
            "username         ": str(self.p_data["username"]),
            "name             ": str(self.p_data["full_name"]),
            "url              ": str(f"instagram.com/{self.p_data['username']}"),
            "followers        ": str(self.p_data["edge_followed_by"]["count"]),
            "following        ": str(self.p_data["edge_follow"]["count"]),
            "posts            ": str(
                self.p_data["edge_owner_to_timeline_media"]["count"]
            ),
            "bio              ": str(self.p_data["biography"].replace("\n", ", ")),
            "external url     ": str(self.p_data["external_url"]),
            "private          ": str(self.p_data["is_private"]),
            "verified         ": str(self.p_data["is_verified"]),
            "profile pic url  ": extra.tiny_url(str(self.p_data["profile_pic_url_hd"])),
            "business account ": str(self.p_data["is_business_account"]),
            "connected to fb  ": str(self.p_data["connected_fb_page"]),
            "joined recently  ": str(self.p_data["is_joined_recently"]),
            "business category": str(self.p_data["business_category_name"]),
        }

        if str(self.p_data["is_private"]).lower() == "true":
            print(f"{re}[!]{gr} private profile can't scrap data !\n")
            return 1
        else:
            for index, post in enumerate(
                self.p_data["edge_owner_to_timeline_media"]["edges"]
            ):
                try:
                    raw_tags.append(
                        extra.extract_hash_tags(
                            post["node"]["edge_media_to_caption"]["edges"][0]["node"][
                                "text"
                            ]
                        )
                    )
                except IndexError:
                    pass
            x = len(raw_tags)
            for i in range(x):
                tag_lis.extend(raw_tags[i])
            self.tags = dict(collections.Counter(tag_lis))

        return self.tags

    def print_data_(self):
        self._extracted_from_print_data_2()
        print(f"{gr}[+]{nu} most used user tags : \n")
        o = 0
        for key, value in collections.Counter(self.tags).most_common():
            print(f"{gr}{key} : {nu}{value}")
            o += 1
            if o == 5:
                break
        print("")

    def print_data(self):
        self._extracted_from_print_data_2()

    def _extracted_from_print_data_2(self):
        os.system("clear")
        extra.banner()
        for key, value in self.output.items():
            print(f"{gr}{key} : {nu}{value}")
        print("")

    def make_dir(self):
        try:
            os.mkdir(self.user)
            os.chdir(self.user)
        except FileExistsError:
            os.chdir(self.user)

    def scrap_uploads(self):
        if self.output["private          "].lower() == "true":
            print(f"{re}[!]{gr} private profile can't scrap data !\n")
            return 1
        else:
            posts = {}
            print(f"{gr}[+]{nu} user uploads data : \n")
            for index, post in enumerate(
                self.p_data["edge_owner_to_timeline_media"]["edges"]
            ):
                # GET PICTURE URL AND SHORTEN IT
                print(
                    f"{gr}picture : {nu}{extra.tiny_url(str(post['node']['thumbnail_resources'][0]['src']))}"
                )
                # IF PIC HAS NO CAPTIONS > SKIP / PRINT
                try:
                    print(
                        f"{gr}Caption : {nu}{post['node']['edge_media_to_caption']['edges'][0]['node']['text']}"
                    )
                except IndexError:
                    pass
                posts[index] = {
                    "comments": str(post["node"]["edge_media_to_comment"]["count"]),
                    "comments disabled": str(post["node"]["comments_disabled"]),
                    "timestamp": str(post["node"]["taken_at_timestamp"]),
                    "likes": str(post["node"]["edge_liked_by"]["count"]),
                    "location": str(post["node"]["location"]),
                    "accessability caption": str(post["node"]["accessibility_caption"]),
                }

                for key, value in posts[index].items():
                    print(f"{gr}{key} : {nu}{value}")
                print("")

    def most_common_tags(self):
        print(f"{gr}[+]{nu} user uploads tags : \n")
        for key, value in collections.Counter(self.tags).most_common():
            print(f"{gr}{key} : {nu}{value}")

    def save_data(self):
        self.make_dir()
        # DOWNLOAD PROFILE PICTURE
        with open(f"profile_pic.jpg", "wb") as f:
            time.sleep(1)
            r = requests.get(
                self.output["profile pic url  "],
                headers={"User-Agent": random.choice(useragent)},
            )
            f.write(r.content)
        print(f"{gr}[+]{nu} saved pic to {os.getcwd()}/profile_pic.jpg")

        # SAVES PROFILE DATA TO TEXT FILE
        self.output_data = {
            "username": str(self.p_data["username"]),
            "name": str(self.p_data["full_name"]),
            "url": str(f"instagram.com/{self.p_data['username']}"),
            "followers": str(self.p_data["edge_followed_by"]["count"]),
            "following": str(self.p_data["edge_follow"]["count"]),
            "posts": str(self.p_data["edge_owner_to_timeline_media"]["count"]),
            "bio": str(self.p_data["biography"]),
            "external url": str(self.p_data["external_url"]),
            "private": str(self.p_data["is_private"]),
            "verified": str(self.p_data["is_verified"]),
            "profile pic url": extra.tiny_url(str(self.p_data["profile_pic_url_hd"])),
            "business account": str(self.p_data["is_business_account"]),
            "connected to fb": str(self.p_data["connected_fb_page"]),
            "joined recently": str(self.p_data["is_joined_recently"]),
            "business category": str(self.p_data["business_category_name"]),
        }
        with open(f"profile_data.txt", "w") as f:
            f.write(json.dumps(self.output_data))
        print(f"{gr}[+]{nu} saved data to {os.getcwd()}/profile_data.txt")

        # SAVES INFORMATION
        posts = {}
        for index, post in enumerate(
            self.p_data["edge_owner_to_timeline_media"]["edges"]
        ):
            posts[index] = {
                "comments": str(post["node"]["edge_media_to_comment"]["count"]),
                "comments disabled": str(post["node"]["comments_disabled"]),
                "timestamp": str(post["node"]["taken_at_timestamp"]),
                "likes": str(post["node"]["edge_liked_by"]["count"]),
                "location": str(post["node"]["location"]),
                "accessability caption": str(post["node"]["accessibility_caption"]),
                "picture": extra.tiny_url(
                    str(post["node"]["thumbnail_resources"][0]["src"])
                ),
            }

            try:
                post[index]["caption"] = str(
                    post["node"]["edge_media_to_caption"]["edges"][0]["node"]["text"]
                )
            except KeyError:
                pass

        with open(f"posts_data.txt", "w") as f:
            f.write(json.dumps(posts))
        print(f"{gr}[+]{nu} saved post info to {os.getcwd()}/posts_data.txt")

        # SAVES TAGS
        with open(f"tags.txt", "w") as f:
            f.write(json.dumps(tag_lis))
        print(f"{gr}[+]{nu} saved tags to {os.getcwd()}/posts_data.txt\n")