Skip to content

Commit

Permalink
Added Cloud-Computing
Browse files Browse the repository at this point in the history
  • Loading branch information
the-sakthi committed Mar 10, 2019
2 parents 7351149 + dbedb5b commit 70b43a1
Show file tree
Hide file tree
Showing 13 changed files with 959 additions and 0 deletions.
1 change: 1 addition & 0 deletions Cloud-Computing/Foodie Friend/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Contains codes from my coursework. Each folder represents a project and therefore each folder contains a README file describing the project. For detailed description about any project, navigate into the folder.
226 changes: 226 additions & 0 deletions Cloud-Computing/Foodie Friend/res/Process.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
import base64
import os
import re
import sys
import tempfile
import logging
import json
import urllib
import cStringIO

from googleapiclient.discovery import build
import googleapiclient.http as e
from googleapiclient import http, errors
from oauth2client.client import GoogleCredentials
from googleapiclient import discovery
from logger import Logger
from foursquare import FourSquare


DISCOVERY_URL = 'https://{api}.googleapis.com/$discovery/rest?version={apiVersion}'
TMP_SAVE_IMG = '/tmp/output.jpeg'


def create_api_client(which_api, version):
"""Returns a Cloud Logging service client for calling the API."""
credentials = GoogleCredentials.get_application_default()
return build(which_api, version, credentials=credentials, discoveryServiceUrl=DISCOVERY_URL)

def create_api_client_withKey(which_api, version):
"""Returns a Cloud Logging service client for calling the API."""
credentials = GoogleCredentials.get_application_default()
return build(which_api, version, developerKey="AIzaSyCZGWBJttrKhTjcy6tRjSaJA52zctUb1e4")

def create_gcs_client():
"""Returns a Cloud PubSub service client for calling the API."""
credentials = GoogleCredentials.get_application_default()
return build('storage', 'v1', credentials=credentials)


class Process(object):
def __init__(self, bucket, filename, filetype, project_id):
print "FUNCTION: constructor"
Logger.log_writer("FUNCTION: constructor")
print ("bucket:{0}".format(str(bucket)))
print ("filename:{0}".format(str(filename)))
print ("filetype:{0}".format(str(filetype)))
print ("project_id:{0}".format(str(project_id)))

self.project_id = project_id
self.bucket = bucket
self.filename = filename
self.filetype = filetype
self.vision_client = create_api_client('vision', 'v1')
self.gcs_client = create_gcs_client()
self.cse_client = create_api_client_withKey('customsearch','v1')
self.uploadFileName = self.filename.split("/")[1]
self.uploadFolderName = self.filename.split("/")[0].split("_")[0]
self.uploadFileName = self.uploadFileName.split(".")[0]

def getFirstImage(self,query):
print "FUNCTION: getFirstImage"
Logger.log_writer("FUNCTION: getFirstImage")
#resultFound = False
#types = ('jpg')
#current = 0
#while current<=0 and resultFound == False:
res = self.cse_client.cse().list(
q=str(query),
cx='002657803801302330803:gatc1h4ugpi',
num=1,
searchType="image",
imgSize="medium",
imgType="photo",
fileType="jpg",
).execute()
'''
if len(res['items'])==1:
resultFound = True
else:
current += 1
'''
first_image_link = res['items'][0]['link']
print first_image_link
Logger.log_writer("Image Link:{}".format(first_image_link))
print self.upload_image(first_image_link)

def upload_image(self,link):
print "FUNCTION: upload_image"
Logger.log_writer("FUNCTION: upload_image")
parts = link.split(".")
ext = parts[len(parts)-1]
body = {
'name': self.uploadFolderName+"_output"+"/"+self.uploadFileName+"."+str(ext),
}

stream = cStringIO.StringIO(urllib.urlopen(link).read())
req = self.gcs_client.objects().insert(bucket=self.bucket,body=body,media_body=e.MediaIoBaseUpload(stream, "image/jpeg"))
resp = req.execute()
#Logger.log_writer("Response:{}".format(resp))
return resp
def get_object(self):
print "FUNCTION: get_object"
Logger.log_writer("FUNCTION: get_object")
req = self.gcs_client.objects().get_media(bucket=self.bucket, object=self.filename)
out_file = e.BytesIO()
downloader = e.MediaIoBaseDownload(out_file, req)

done = False
while done is False:
status, done = downloader.next_chunk()
print("Download {}%.".format(int(status.progress() * 100)))
coordinates = out_file.getvalue().split(",")[:2]
out_file.close()
return coordinates
def upload_local_image(self,filename):
print "FUNCTION: Process.upload_local_image"
Logger.log_writer("FUNCTION: Process.upload_local_image")

body = {
'name': self.uploadFolderName+"_output"+"/"+self.uploadFileName+".jpg",
}

with open(filename, 'rb') as f:
req = self.gcs_client.objects().insert(
bucket=self.bucket, body=body,media_body=e.MediaIoBaseUpload(
f, "image/jpeg"))
resp = req.execute()
return resp

def find_suggestions(self,latitude,longitude):
print "FUNCTION: Process.find_suggestions"
Logger.log_writer("FUNCTION: Process.find_suggestions")
foursquareAPI = FourSquare(latitude,longitude)
output = foursquareAPI.req()
print output
print self.upload_XMLobject(output)

def upload_XMLobject(self,content):
print "FUNCTION: upload_XMLobject"
Logger.log_writer("FUNCTION: upload_XMLobject")
body = {
'name': self.uploadFolderName+"_output"+"/"+self.uploadFileName+".xml",
}
stream = e.BytesIO()
content.write(stream)
req = self.gcs_client.objects().insert(bucket=self.bucket,body=body,media_body=e.MediaIoBaseUpload(stream, 'text/xml'))
resp = req.execute()
return resp

def upload_object(self,content):
print "FUNCTION: upload_object"
Logger.log_writer("FUNCTION: upload_object")
body = {
'name': self.uploadFolderName+"_output"+"/"+self.uploadFileName+".txt",
}

stream = e.BytesIO()
for line in content:
stream.write(line+'\n')

'''
if readers or owners:
body['acl'] = []
for r in readers:
body['acl'].append({
'entity': 'user-%s' % r,
'role': 'READER',
'email': r
})
for o in owners:
body['acl'].append({
'entity': 'user-%s' % o,
'role': 'OWNER',
'email': o
})
'''
req = self.gcs_client.objects().insert(bucket=self.bucket,body=body,media_body=e.MediaIoBaseUpload(stream, 'text/plain'))
resp = req.execute()
return resp

def img_to_text(self):
print "FUNCTION: img_to_text"
Logger.log_writer("FUNCTION: img_to_text")

vision_body ={
"features": [{
"type": "TEXT_DETECTION",
"maxResults": 10
}],
"image": {
"source": {
"gcsImageUri": "gs://{0}/{1}".format(self.bucket, self.filename)
}
}
}
#Logger.log_writer("{0} process stops".format(filename))vision_body
Logger.log_writer("Request is: {0}".format(vision_body))
try:
vision_request = self.vision_client.images().annotate(body={'requests': vision_body})
vision_response = vision_request.execute()
#print(json.dumps(vision_response,indent=2))
Logger.log_writer("Response is: {0}".format(vision_response))
if 'responses' not in vision_response:
return {}
text_response = {}
for response in vision_response['responses']:
if 'error' in response:
print("API Error for %s: %s" % (
self.filename,
response['error']['message']
if 'message' in response['error']
else ''))
continue
if 'textAnnotations' in response:
text_response[self.filename] = response['textAnnotations'][0]['description'].split("\n")
else:
text_response[self.filename] = []
Logger.log_writer("Returning from Process: {0}".format(text_response))
return text_response
except Exception, e:
print "Problem with file {0} with {1}".format(self.filename, str(e))
Logger.log_writer("Problem with file {0} with {1}".format(self.filename, str(e)))
pass


23 changes: 23 additions & 0 deletions Cloud-Computing/Foodie Friend/res/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
Resource files

Verison 5:
1. User uploads cropped Image to source bucket to which watcher is connected
2. Four buckets - Photo_input, Photo_output,Location_input,Location_output
3. No result handled
4. No extracted text case handled
5. Encoding errors handled
6. Image (Medium,Photo) also downloaded at bucket
7. Only Jpg,Png pics downloaded
8. Suggestions handled by FourSquare
9. Suggestions given in XML format
10. timestamp.jpg uploaded for no result case

Details are gathered by wikipedia.

Commands:
---------
1. gcloud app deploy 01_watcher/app.yaml --bookshelf-164400
2. gsutil notification watchbucket https://bookshelf-164400.appspot.com/media-processing-hook gs://audio-mediap-dropzone
3. gsutil -m cp ~/Pictures/test.png gs://audio-mediap-dropzone
4. gsutil notification stopchannel cb8c5cfe-36f2-4295-8ac4-e009c1b3b91d 3e1NjKnzEL_8hG4sMeZkNTh8LJE
5. python worker.py --subscription=mediap-sub
17 changes: 17 additions & 0 deletions Cloud-Computing/Foodie Friend/res/Startup Script
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#! /bin/bash
sudo mkdir /tmp/food
cd /tmp/food
sudo wget -q https://raw.githubusercontent.com/jatsakthi/Cloud-Computing/master/Foodie%20Friend/res/crawl.py
sudo wget -q https://raw.githubusercontent.com/jatsakthi/Cloud-Computing/master/Foodie%20Friend/res/browser.py
sudo wget -q https://raw.githubusercontent.com/jatsakthi/Cloud-Computing/master/Foodie%20Friend/res/search.py
sudo wget -q https://raw.githubusercontent.com/jatsakthi/Cloud-Computing/master/Foodie%20Friend/res/Process.py
sudo wget -q https://raw.githubusercontent.com/jatsakthi/Cloud-Computing/master/Foodie%20Friend/res/requirements.txt
sudo wget -q https://raw.githubusercontent.com/GoogleCloudPlatform/pubsub-media-processing/master/02_workers/recurror.py
sudo wget -q https://raw.githubusercontent.com/GoogleCloudPlatform/pubsub-media-processing/master/02_workers/logger.py
sudo wget -q https://raw.githubusercontent.com/jatsakthi/Cloud-Computing/master/Foodie%20Friend/res/worker.py
sudo curl https://bootstrap.pypa.io/get-pip.py | sudo python
sudo pip install virtualenv
sudo virtualenv venv
source venv/bin/activate
sudo venv/bin/pip install -r requirements.txt
python worker.py --subscription=mediap-sub --destination=food-output
105 changes: 105 additions & 0 deletions Cloud-Computing/Foodie Friend/res/browser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#!/usr/bin/python
#
# Peteris Krumins ([email protected])
# http://www.catonmat.net -- good coders code, great reuse
#
# http://www.catonmat.net/blog/python-library-for-google-search/
#
# Code is licensed under MIT license.
#

import random
import socket
import urllib
import urllib2
import httplib

BROWSERS = (
# Top most popular browsers in my access.log on 2009.02.12
# tail -50000 access.log |
# awk -F\" '{B[$6]++} END { for (b in B) { print B[b] ": " b } }' |
# sort -rn |
# head -20
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.6) Gecko/2009011913 Firefox/3.0.6',
'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.6) Gecko/2009011912 Firefox/3.0.6',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.6) Gecko/2009011913 Firefox/3.0.6 (.NET CLR 3.5.30729)',
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.6) Gecko/2009020911 Ubuntu/8.10 (intrepid) Firefox/3.0.6',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.6) Gecko/2009011913 Firefox/3.0.6',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.6) Gecko/2009011913 Firefox/3.0.6 (.NET CLR 3.5.30729)',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.48 Safari/525.19',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30; .NET CLR 3.0.04506.648)',
'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.0.6) Gecko/2009020911 Ubuntu/8.10 (intrepid) Firefox/3.0.6',
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.5) Gecko/2008121621 Ubuntu/8.04 (hardy) Firefox/3.0.5',
'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-us) AppleWebKit/525.27.1 (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'
)

TIMEOUT = 5 # socket timeout

class BrowserError(Exception):
def __init__(self, url, error):
self.url = url
self.error = error

class PoolHTTPConnection(httplib.HTTPConnection):
def connect(self):
"""Connect to the host and port specified in __init__."""
msg = "getaddrinfo returns an empty list"
for res in socket.getaddrinfo(self.host, self.port, 0,
socket.SOCK_STREAM):
af, socktype, proto, canonname, sa = res
try:
self.sock = socket.socket(af, socktype, proto)
if self.debuglevel > 0:
print "connect: (%s, %s)" % (self.host, self.port)
self.sock.settimeout(TIMEOUT)
self.sock.connect(sa)
except socket.error, msg:
if self.debuglevel > 0:
print 'connect fail:', (self.host, self.port)
if self.sock:
self.sock.close()
self.sock = None
continue
break
if not self.sock:
raise socket.error, msg

class PoolHTTPHandler(urllib2.HTTPHandler):
def http_open(self, req):
return self.do_open(PoolHTTPConnection, req)

class Browser(object):
def __init__(self, user_agent=BROWSERS[0], debug=False, use_pool=False):
self.headers = {
'User-Agent': user_agent,
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-us,en;q=0.5'
}
self.debug = debug

def get_page(self, url, data=None):
handlers = [PoolHTTPHandler]
opener = urllib2.build_opener(*handlers)
if data: data = urllib.urlencode(data)
request = urllib2.Request(url, data, self.headers)
try:
response = opener.open(request)
return response.read()
except (urllib2.HTTPError, urllib2.URLError), e:
raise BrowserError(url, str(e))
except (socket.error, socket.sslerror), msg:
raise BrowserError(url, msg)
except socket.timeout, e:
raise BrowserError(url, "timeout")
except KeyboardInterrupt:
raise
except:
raise BrowserError(url, "unknown error")

def set_random_user_agent(self):
self.headers['User-Agent'] = random.choice(BROWSERS)
return self.headers['User-Agent']

Loading

0 comments on commit 70b43a1

Please sign in to comment.