Skip to content

Commit c3f61db

Browse files
author
Ilter
committed
Merge pull request #44 from autolab/persistent_job_queue_prodcom
Persistent job queue prodcom
2 parents 0806f99 + 460eea4 commit c3f61db

File tree

1,061 files changed

+869
-219276
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,061 files changed

+869
-219276
lines changed

.gitignore

+9
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,12 @@ scripts/*.sh
88
vmms/id_rsa*
99
# config
1010
config.py
11+
12+
# Virtualenv
13+
.Python
14+
bin
15+
lib
16+
include
17+
18+
# Mac OS X custom attribute files
19+
.DS_Store

config.py config.template.py

+13-8
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
# config.py - Global configuration constants and runtime info
33
#
44

5-
from vmms.localSSH import *
65
import logging
76

87
# Config - defines
@@ -22,18 +21,17 @@ class Config:
2221
PORT = 3000
2322

2423
# Log file. Setting this to None sends the server output to stdout
25-
LOGFILE = None
24+
LOGFILE = "tango3.log"
2625

2726
# Logging level
28-
LOGLEVEL = logging.INFO
27+
LOGLEVEL = logging.DEBUG
2928

3029
# Courselabs directory. Must be created before starting Tango
31-
COURSELABS = ""
30+
COURSELABS = "<INSERT_PATH_HERE>"
3231

3332
# VMMS to use. Must be set to a VMMS implemented in vmms/ before
34-
# starting Tango
33+
# starting Tango. Options are: "localSSH", "tashiSSH", "ec2SSH"
3534
VMMS_NAME = "localSSH"
36-
VMMS = LocalSSH()
3735

3836
#####
3937
# Part 2: Constants that shouldn't need to change very often.
@@ -88,7 +86,7 @@ class Config:
8886
POOL_SIZE = 2
8987

9088
# Path for tashi images
91-
TASHI_IMAGE_PATH = ""
89+
TASHI_IMAGE_PATH = "/raid/tashi/images/"
9290

9391
# Optionally log finer-grained timing information
9492
LOG_TIMING = True
@@ -109,7 +107,14 @@ class Config:
109107
copyout_errors=0
110108

111109
######
112-
# Part 4: EC2 Constants
110+
# Part 4: Settings for shared memory
111+
#
112+
USE_REDIS = False
113+
REDIS_HOSTNAME = "127.0.0.1"
114+
REDIS_PORT = 6379
115+
116+
######
117+
# Part 5: EC2 Constants
113118
#
114119
EC2_REGION = ''
115120
DEFAULT_AMI = ''

jobManager.py

+93
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
#
2+
# JobManager - Thread that assigns jobs to worker threads
3+
#
4+
# The job manager thread wakes up every so often, scans the job list
5+
# for new unassigned jobs, and tries to assign them.
6+
#
7+
# Assigning a job will try to get a preallocated VM that is ready,
8+
# otherwise will pass 'None' as the preallocated vm. A worker thread
9+
# is launched that will handle things from here on. If anything goes
10+
# wrong, the job is made dead with the error.
11+
#
12+
import time, threading, logging
13+
14+
from config import Config
15+
from worker import Worker
16+
17+
from jobQueue import JobQueue
18+
from preallocator import Preallocator
19+
20+
class JobManager:
21+
22+
def __init__(self, queue, vmms, preallocator):
23+
self.daemon = True
24+
self.jobQueue = queue
25+
self.vmms = vmms
26+
self.preallocator = preallocator
27+
self.log = logging.getLogger("JobManager")
28+
threading.Thread(target=self.__manage).start()
29+
30+
31+
def __manage(self):
32+
while True:
33+
if Config.REUSE_VMS:
34+
id,vm = self.jobQueue.getNextPendingJobReuse()
35+
else:
36+
id = self.jobQueue.getNextPendingJob()
37+
38+
if id:
39+
job = self.jobQueue.get(id)
40+
try:
41+
# Mark the job assigned
42+
self.jobQueue.assignJob(job.id)
43+
44+
# Try to find a vm on the free list and allocate it to
45+
# the worker if successful.
46+
if Config.REUSE_VMS:
47+
preVM = vm
48+
else:
49+
preVM = self.preallocator.allocVM(job.vm.name)
50+
51+
# Now dispatch the job to a worker
52+
self.log.info("Dispatched job %s:%d to %s [try %d]" %
53+
(job.name, job.id, preVM.name, job.retries))
54+
job.appendTrace("%s|Dispatched job %s:%d [try %d]" %
55+
(time.ctime(time.time()+time.timezone), job.name, job.id,
56+
job.retries))
57+
vmms = self.vmms[job.vm.vmms] # Create new vmms object
58+
Worker(job, vmms, self.jobQueue, self.preallocator, preVM).start()
59+
60+
except Exception, err:
61+
self.jobQueue.makeDead(job.id, str(err))
62+
63+
64+
# Sleep for a bit and then check again
65+
time.sleep(Config.DISPATCH_PERIOD)
66+
67+
68+
if __name__ == "__main__":
69+
70+
if not Config.USE_REDIS:
71+
print("You need to have Redis running to be able to initiate stand-alone\
72+
JobManager")
73+
else:
74+
vmms = None
75+
76+
if Config.VMMS_NAME == "localSSH":
77+
from vmms.localSSH import LocalSSH
78+
vmms = LocalSSH()
79+
elif Config.VMMS_NAME == "tashiSSH":
80+
from vmms.tashiSSH import TashiSSH
81+
vmms = TashiSSH()
82+
elif Config.VMMS_NAME == "ec2SSH":
83+
from vmms.ec2SSH import Ec2SSH
84+
vmms = Ec2SSH()
85+
86+
vmms = {Config.VMMS_NAME: vmms}
87+
preallocator = Preallocator(vmms)
88+
queue = JobQueue(preallocator)
89+
90+
JobManager(queue, vmms, preallocator)
91+
92+
print("Starting the stand-alone Tango JobManager")
93+

0 commit comments

Comments
 (0)