-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
13 changed files
with
959 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Contains codes from my coursework. Each folder represents a project and therefore each folder contains a README file describing the project. For detailed description about any project, navigate into the folder. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,226 @@ | ||
import base64 | ||
import os | ||
import re | ||
import sys | ||
import tempfile | ||
import logging | ||
import json | ||
import urllib | ||
import cStringIO | ||
|
||
from googleapiclient.discovery import build | ||
import googleapiclient.http as e | ||
from googleapiclient import http, errors | ||
from oauth2client.client import GoogleCredentials | ||
from googleapiclient import discovery | ||
from logger import Logger | ||
from foursquare import FourSquare | ||
|
||
|
||
DISCOVERY_URL = 'https://{api}.googleapis.com/$discovery/rest?version={apiVersion}' | ||
TMP_SAVE_IMG = '/tmp/output.jpeg' | ||
|
||
|
||
def create_api_client(which_api, version): | ||
"""Returns a Cloud Logging service client for calling the API.""" | ||
credentials = GoogleCredentials.get_application_default() | ||
return build(which_api, version, credentials=credentials, discoveryServiceUrl=DISCOVERY_URL) | ||
|
||
def create_api_client_withKey(which_api, version): | ||
"""Returns a Cloud Logging service client for calling the API.""" | ||
credentials = GoogleCredentials.get_application_default() | ||
return build(which_api, version, developerKey="AIzaSyCZGWBJttrKhTjcy6tRjSaJA52zctUb1e4") | ||
|
||
def create_gcs_client(): | ||
"""Returns a Cloud PubSub service client for calling the API.""" | ||
credentials = GoogleCredentials.get_application_default() | ||
return build('storage', 'v1', credentials=credentials) | ||
|
||
|
||
class Process(object): | ||
def __init__(self, bucket, filename, filetype, project_id): | ||
print "FUNCTION: constructor" | ||
Logger.log_writer("FUNCTION: constructor") | ||
print ("bucket:{0}".format(str(bucket))) | ||
print ("filename:{0}".format(str(filename))) | ||
print ("filetype:{0}".format(str(filetype))) | ||
print ("project_id:{0}".format(str(project_id))) | ||
|
||
self.project_id = project_id | ||
self.bucket = bucket | ||
self.filename = filename | ||
self.filetype = filetype | ||
self.vision_client = create_api_client('vision', 'v1') | ||
self.gcs_client = create_gcs_client() | ||
self.cse_client = create_api_client_withKey('customsearch','v1') | ||
self.uploadFileName = self.filename.split("/")[1] | ||
self.uploadFolderName = self.filename.split("/")[0].split("_")[0] | ||
self.uploadFileName = self.uploadFileName.split(".")[0] | ||
|
||
def getFirstImage(self,query): | ||
print "FUNCTION: getFirstImage" | ||
Logger.log_writer("FUNCTION: getFirstImage") | ||
#resultFound = False | ||
#types = ('jpg') | ||
#current = 0 | ||
#while current<=0 and resultFound == False: | ||
res = self.cse_client.cse().list( | ||
q=str(query), | ||
cx='002657803801302330803:gatc1h4ugpi', | ||
num=1, | ||
searchType="image", | ||
imgSize="medium", | ||
imgType="photo", | ||
fileType="jpg", | ||
).execute() | ||
''' | ||
if len(res['items'])==1: | ||
resultFound = True | ||
else: | ||
current += 1 | ||
''' | ||
first_image_link = res['items'][0]['link'] | ||
print first_image_link | ||
Logger.log_writer("Image Link:{}".format(first_image_link)) | ||
print self.upload_image(first_image_link) | ||
|
||
def upload_image(self,link): | ||
print "FUNCTION: upload_image" | ||
Logger.log_writer("FUNCTION: upload_image") | ||
parts = link.split(".") | ||
ext = parts[len(parts)-1] | ||
body = { | ||
'name': self.uploadFolderName+"_output"+"/"+self.uploadFileName+"."+str(ext), | ||
} | ||
|
||
stream = cStringIO.StringIO(urllib.urlopen(link).read()) | ||
req = self.gcs_client.objects().insert(bucket=self.bucket,body=body,media_body=e.MediaIoBaseUpload(stream, "image/jpeg")) | ||
resp = req.execute() | ||
#Logger.log_writer("Response:{}".format(resp)) | ||
return resp | ||
def get_object(self): | ||
print "FUNCTION: get_object" | ||
Logger.log_writer("FUNCTION: get_object") | ||
req = self.gcs_client.objects().get_media(bucket=self.bucket, object=self.filename) | ||
out_file = e.BytesIO() | ||
downloader = e.MediaIoBaseDownload(out_file, req) | ||
|
||
done = False | ||
while done is False: | ||
status, done = downloader.next_chunk() | ||
print("Download {}%.".format(int(status.progress() * 100))) | ||
coordinates = out_file.getvalue().split(",")[:2] | ||
out_file.close() | ||
return coordinates | ||
def upload_local_image(self,filename): | ||
print "FUNCTION: Process.upload_local_image" | ||
Logger.log_writer("FUNCTION: Process.upload_local_image") | ||
|
||
body = { | ||
'name': self.uploadFolderName+"_output"+"/"+self.uploadFileName+".jpg", | ||
} | ||
|
||
with open(filename, 'rb') as f: | ||
req = self.gcs_client.objects().insert( | ||
bucket=self.bucket, body=body,media_body=e.MediaIoBaseUpload( | ||
f, "image/jpeg")) | ||
resp = req.execute() | ||
return resp | ||
|
||
def find_suggestions(self,latitude,longitude): | ||
print "FUNCTION: Process.find_suggestions" | ||
Logger.log_writer("FUNCTION: Process.find_suggestions") | ||
foursquareAPI = FourSquare(latitude,longitude) | ||
output = foursquareAPI.req() | ||
print output | ||
print self.upload_XMLobject(output) | ||
|
||
def upload_XMLobject(self,content): | ||
print "FUNCTION: upload_XMLobject" | ||
Logger.log_writer("FUNCTION: upload_XMLobject") | ||
body = { | ||
'name': self.uploadFolderName+"_output"+"/"+self.uploadFileName+".xml", | ||
} | ||
stream = e.BytesIO() | ||
content.write(stream) | ||
req = self.gcs_client.objects().insert(bucket=self.bucket,body=body,media_body=e.MediaIoBaseUpload(stream, 'text/xml')) | ||
resp = req.execute() | ||
return resp | ||
|
||
def upload_object(self,content): | ||
print "FUNCTION: upload_object" | ||
Logger.log_writer("FUNCTION: upload_object") | ||
body = { | ||
'name': self.uploadFolderName+"_output"+"/"+self.uploadFileName+".txt", | ||
} | ||
|
||
stream = e.BytesIO() | ||
for line in content: | ||
stream.write(line+'\n') | ||
|
||
''' | ||
if readers or owners: | ||
body['acl'] = [] | ||
for r in readers: | ||
body['acl'].append({ | ||
'entity': 'user-%s' % r, | ||
'role': 'READER', | ||
'email': r | ||
}) | ||
for o in owners: | ||
body['acl'].append({ | ||
'entity': 'user-%s' % o, | ||
'role': 'OWNER', | ||
'email': o | ||
}) | ||
''' | ||
req = self.gcs_client.objects().insert(bucket=self.bucket,body=body,media_body=e.MediaIoBaseUpload(stream, 'text/plain')) | ||
resp = req.execute() | ||
return resp | ||
|
||
def img_to_text(self): | ||
print "FUNCTION: img_to_text" | ||
Logger.log_writer("FUNCTION: img_to_text") | ||
|
||
vision_body ={ | ||
"features": [{ | ||
"type": "TEXT_DETECTION", | ||
"maxResults": 10 | ||
}], | ||
"image": { | ||
"source": { | ||
"gcsImageUri": "gs://{0}/{1}".format(self.bucket, self.filename) | ||
} | ||
} | ||
} | ||
#Logger.log_writer("{0} process stops".format(filename))vision_body | ||
Logger.log_writer("Request is: {0}".format(vision_body)) | ||
try: | ||
vision_request = self.vision_client.images().annotate(body={'requests': vision_body}) | ||
vision_response = vision_request.execute() | ||
#print(json.dumps(vision_response,indent=2)) | ||
Logger.log_writer("Response is: {0}".format(vision_response)) | ||
if 'responses' not in vision_response: | ||
return {} | ||
text_response = {} | ||
for response in vision_response['responses']: | ||
if 'error' in response: | ||
print("API Error for %s: %s" % ( | ||
self.filename, | ||
response['error']['message'] | ||
if 'message' in response['error'] | ||
else '')) | ||
continue | ||
if 'textAnnotations' in response: | ||
text_response[self.filename] = response['textAnnotations'][0]['description'].split("\n") | ||
else: | ||
text_response[self.filename] = [] | ||
Logger.log_writer("Returning from Process: {0}".format(text_response)) | ||
return text_response | ||
except Exception, e: | ||
print "Problem with file {0} with {1}".format(self.filename, str(e)) | ||
Logger.log_writer("Problem with file {0} with {1}".format(self.filename, str(e))) | ||
pass | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
Resource files | ||
|
||
Verison 5: | ||
1. User uploads cropped Image to source bucket to which watcher is connected | ||
2. Four buckets - Photo_input, Photo_output,Location_input,Location_output | ||
3. No result handled | ||
4. No extracted text case handled | ||
5. Encoding errors handled | ||
6. Image (Medium,Photo) also downloaded at bucket | ||
7. Only Jpg,Png pics downloaded | ||
8. Suggestions handled by FourSquare | ||
9. Suggestions given in XML format | ||
10. timestamp.jpg uploaded for no result case | ||
|
||
Details are gathered by wikipedia. | ||
|
||
Commands: | ||
--------- | ||
1. gcloud app deploy 01_watcher/app.yaml --bookshelf-164400 | ||
2. gsutil notification watchbucket https://bookshelf-164400.appspot.com/media-processing-hook gs://audio-mediap-dropzone | ||
3. gsutil -m cp ~/Pictures/test.png gs://audio-mediap-dropzone | ||
4. gsutil notification stopchannel cb8c5cfe-36f2-4295-8ac4-e009c1b3b91d 3e1NjKnzEL_8hG4sMeZkNTh8LJE | ||
5. python worker.py --subscription=mediap-sub |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#! /bin/bash | ||
sudo mkdir /tmp/food | ||
cd /tmp/food | ||
sudo wget -q https://raw.githubusercontent.com/jatsakthi/Cloud-Computing/master/Foodie%20Friend/res/crawl.py | ||
sudo wget -q https://raw.githubusercontent.com/jatsakthi/Cloud-Computing/master/Foodie%20Friend/res/browser.py | ||
sudo wget -q https://raw.githubusercontent.com/jatsakthi/Cloud-Computing/master/Foodie%20Friend/res/search.py | ||
sudo wget -q https://raw.githubusercontent.com/jatsakthi/Cloud-Computing/master/Foodie%20Friend/res/Process.py | ||
sudo wget -q https://raw.githubusercontent.com/jatsakthi/Cloud-Computing/master/Foodie%20Friend/res/requirements.txt | ||
sudo wget -q https://raw.githubusercontent.com/GoogleCloudPlatform/pubsub-media-processing/master/02_workers/recurror.py | ||
sudo wget -q https://raw.githubusercontent.com/GoogleCloudPlatform/pubsub-media-processing/master/02_workers/logger.py | ||
sudo wget -q https://raw.githubusercontent.com/jatsakthi/Cloud-Computing/master/Foodie%20Friend/res/worker.py | ||
sudo curl https://bootstrap.pypa.io/get-pip.py | sudo python | ||
sudo pip install virtualenv | ||
sudo virtualenv venv | ||
source venv/bin/activate | ||
sudo venv/bin/pip install -r requirements.txt | ||
python worker.py --subscription=mediap-sub --destination=food-output |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
#!/usr/bin/python | ||
# | ||
# Peteris Krumins ([email protected]) | ||
# http://www.catonmat.net -- good coders code, great reuse | ||
# | ||
# http://www.catonmat.net/blog/python-library-for-google-search/ | ||
# | ||
# Code is licensed under MIT license. | ||
# | ||
|
||
import random | ||
import socket | ||
import urllib | ||
import urllib2 | ||
import httplib | ||
|
||
BROWSERS = ( | ||
# Top most popular browsers in my access.log on 2009.02.12 | ||
# tail -50000 access.log | | ||
# awk -F\" '{B[$6]++} END { for (b in B) { print B[b] ": " b } }' | | ||
# sort -rn | | ||
# head -20 | ||
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.6) Gecko/2009011913 Firefox/3.0.6', | ||
'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.6) Gecko/2009011912 Firefox/3.0.6', | ||
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.6) Gecko/2009011913 Firefox/3.0.6 (.NET CLR 3.5.30729)', | ||
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.6) Gecko/2009020911 Ubuntu/8.10 (intrepid) Firefox/3.0.6', | ||
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.6) Gecko/2009011913 Firefox/3.0.6', | ||
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.6) Gecko/2009011913 Firefox/3.0.6 (.NET CLR 3.5.30729)', | ||
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.48 Safari/525.19', | ||
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30; .NET CLR 3.0.04506.648)', | ||
'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.0.6) Gecko/2009020911 Ubuntu/8.10 (intrepid) Firefox/3.0.6', | ||
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.5) Gecko/2008121621 Ubuntu/8.04 (hardy) Firefox/3.0.5', | ||
'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-us) AppleWebKit/525.27.1 (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1', | ||
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)', | ||
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)', | ||
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)' | ||
) | ||
|
||
TIMEOUT = 5 # socket timeout | ||
|
||
class BrowserError(Exception): | ||
def __init__(self, url, error): | ||
self.url = url | ||
self.error = error | ||
|
||
class PoolHTTPConnection(httplib.HTTPConnection): | ||
def connect(self): | ||
"""Connect to the host and port specified in __init__.""" | ||
msg = "getaddrinfo returns an empty list" | ||
for res in socket.getaddrinfo(self.host, self.port, 0, | ||
socket.SOCK_STREAM): | ||
af, socktype, proto, canonname, sa = res | ||
try: | ||
self.sock = socket.socket(af, socktype, proto) | ||
if self.debuglevel > 0: | ||
print "connect: (%s, %s)" % (self.host, self.port) | ||
self.sock.settimeout(TIMEOUT) | ||
self.sock.connect(sa) | ||
except socket.error, msg: | ||
if self.debuglevel > 0: | ||
print 'connect fail:', (self.host, self.port) | ||
if self.sock: | ||
self.sock.close() | ||
self.sock = None | ||
continue | ||
break | ||
if not self.sock: | ||
raise socket.error, msg | ||
|
||
class PoolHTTPHandler(urllib2.HTTPHandler): | ||
def http_open(self, req): | ||
return self.do_open(PoolHTTPConnection, req) | ||
|
||
class Browser(object): | ||
def __init__(self, user_agent=BROWSERS[0], debug=False, use_pool=False): | ||
self.headers = { | ||
'User-Agent': user_agent, | ||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', | ||
'Accept-Language': 'en-us,en;q=0.5' | ||
} | ||
self.debug = debug | ||
|
||
def get_page(self, url, data=None): | ||
handlers = [PoolHTTPHandler] | ||
opener = urllib2.build_opener(*handlers) | ||
if data: data = urllib.urlencode(data) | ||
request = urllib2.Request(url, data, self.headers) | ||
try: | ||
response = opener.open(request) | ||
return response.read() | ||
except (urllib2.HTTPError, urllib2.URLError), e: | ||
raise BrowserError(url, str(e)) | ||
except (socket.error, socket.sslerror), msg: | ||
raise BrowserError(url, msg) | ||
except socket.timeout, e: | ||
raise BrowserError(url, "timeout") | ||
except KeyboardInterrupt: | ||
raise | ||
except: | ||
raise BrowserError(url, "unknown error") | ||
|
||
def set_random_user_agent(self): | ||
self.headers['User-Agent'] = random.choice(BROWSERS) | ||
return self.headers['User-Agent'] | ||
|
Oops, something went wrong.