diff --git a/Train/train_DeepCSV.py b/Train/train_DeepCSV.py
new file mode 100644
index 0000000..45b3ce4
--- /dev/null
+++ b/Train/train_DeepCSV.py
@@ -0,0 +1,39 @@
+
+#import sys
+#import tensorflow as tf
+#sys.modules["keras"] = tf.keras
+
+from DeepJetCore.training.training_base import training_base
+from DeepJetCore.modeltools import fixLayersContaining,printLayerInfosAndWeights
+
+
+#also does all the parsing
+train=training_base(testrun=False)
+
+newtraining= not train.modelSet()
+#for recovering a training
+if newtraining:
+    from models import model_deepCSV
+    
+    train.setModel(model_deepCSV,dropoutRate=0.1)
+    
+    #train.keras_model=fixLayersContaining(train.keras_model, 'regression', invert=False)
+    
+    train.train_data.maxFilesOpen=1
+ 
+train.compileModel(learningrate=0.003,
+                   loss='categorical_crossentropy',
+                   metrics=['accuracy'])
+    
+print(train.keras_model.summary())
+#printLayerInfosAndWeights(train.keras_model)
+
+model,history = train.trainModel(nepochs=50,
+                                 batchsize=10000, 
+                                 stop_patience=300, 
+                                 lr_factor=0.5, 
+                                 lr_patience=-1, 
+                                 lr_epsilon=0.0001, 
+                                 lr_cooldown=10, 
+                                 lr_minimum=0.00001,
+                                 verbose=1,checkperiod=1)
diff --git a/env.sh b/env.sh
index 31e8538..61c11c8 100644
--- a/env.sh
+++ b/env.sh
@@ -7,6 +7,8 @@ export DEEPJETCORE_SUBPACKAGE=$DJSUBPACKAGE
 cd $DJSUBPACKAGE
 export PYTHONPATH=$DJSUBPACKAGE/modules:$PYTHONPATH
 export PYTHONPATH=$DJSUBPACKAGE/modules/datastructures:$PYTHONPATH
+export PYTHONPATH=$DJSUBPACKAGE/modules/models:$PYTHONPATH
+
 export PATH=$DJSUBPACKAGE/scripts:$PATH
 
 export LD_LIBRARY_PATH=$DJSUBPACKAGE/modules/compiled:$LD_LIBRARY_PATH
diff --git a/modules/datastructures/TrainData_deepFlavour.py b/modules/datastructures/TrainData_deepFlavour.py
index 1c37727..0b86328 100644
--- a/modules/datastructures/TrainData_deepFlavour.py
+++ b/modules/datastructures/TrainData_deepFlavour.py
@@ -2,7 +2,7 @@
 
 from DeepJetCore.TrainData import TrainData, fileTimeOut
 import numpy as np
-
+from argparse import ArgumentParser
 
 
 class TrainData_DF(TrainData):
@@ -10,6 +10,7 @@ def __init__(self):
 
         TrainData.__init__(self)
 
+        
         self.description = "DeepJet training datastructure"
         
         self.truth_branches = ['isB','isBB','isGBB','isLeptonicB','isLeptonicB_C','isC','isGCC','isCC','isUD','isS','isG']
@@ -18,7 +19,6 @@ def __init__(self):
         self.weightbranchY='jet_eta'
         self.remove = True
         self.referenceclass='isB'
-        
         self.weight_binX = np.array([
             10,25,30,35,40,45,50,60,75,100,
             125,150,175,200,250,300,400,500,
@@ -126,27 +126,29 @@ def convertFromSourceFile(self, filename, weighterobjects, istraining):
         from DeepJetCore.stopwatch import stopwatch
         sw=stopwatch()
         swall=stopwatch()
+        if not istraining:
+            self.remove = False
         
         def reduceTruth(uproot_arrays):
             
-            b = uproot_arrays['isB']
+            b = uproot_arrays[b'isB']
             
-            bb = uproot_arrays['isBB']
-            gbb = uproot_arrays['isGBB']
+            bb = uproot_arrays[b'isBB']
+            gbb = uproot_arrays[b'isGBB']
             
-            bl = uproot_arrays['isLeptonicB']
-            blc = uproot_arrays['isLeptonicB_C']
+            bl = uproot_arrays[b'isLeptonicB']
+            blc = uproot_arrays[b'isLeptonicB_C']
             lepb = bl+blc
             
-            c = uproot_arrays['isC']
-            cc = uproot_arrays['isCC']
-            gcc = uproot_arrays['isGCC']
+            c = uproot_arrays[b'isC']
+            cc = uproot_arrays[b'isCC']
+            gcc = uproot_arrays[b'isGCC']
             
-            ud = uproot_arrays['isUD']
-            s = uproot_arrays['isS']
+            ud = uproot_arrays[b'isUD']
+            s = uproot_arrays[b'isS']
             uds = ud+s
             
-            g = uproot_arrays['isG']
+            g = uproot_arrays[b'isG']
             
             return np.vstack((b,bb+gbb,lepb,c+cc+gcc,uds,g)).transpose()
         
@@ -205,7 +207,6 @@ def reduceTruth(uproot_arrays):
                 stop = None,
                 branches = b
             )
-            print weighterobjects
             notremoves=weighterobjects['weigther'].createNotRemoveIndices(for_remove)
             undef=for_remove['isUndefined']
             notremoves-=undef
@@ -240,3 +241,252 @@ def writeOutPrediction(self, predicted, features, truth, weights, outfilename, i
         out = np.core.records.fromarrays(np.vstack( (predicted[0].transpose(),truth[0].transpose(), features[0][:,0:2].transpose() ) ),
                                          names='prob_isB, prob_isBB,prob_isLeptB, prob_isC,prob_isUDS,prob_isG,isB, isBB, isLeptB, isC,isUDS,isG,jet_pt, jet_eta')
         array2root(out, outfilename, 'tree')
+
+
+
+class TrainData_DeepCSV(TrainData):
+    def __init__(self):
+
+        TrainData.__init__(self)
+
+        self.description = "DeepCSV training datastructure"
+       
+        self.truth_branches = ['isB','isBB','isGBB','isLeptonicB','isLeptonicB_C','isC','isGCC','isCC','isUD','isS','isG']
+        self.undefTruth=['isUndefined']
+        self.weightbranchX='jet_pt'
+        self.weightbranchY='jet_eta'
+        self.remove = True
+        self.referenceclass='isB'
+        self.weight_binX = np.array([
+            10,25,30,35,40,45,50,60,75,100,
+            125,150,175,200,250,300,400,500,
+            600,2000],dtype=float)
+        
+        self.weight_binY = np.array(
+            [-2.5,-2.,-1.5,-1.,-0.5,0.5,1,1.5,2.,2.5],
+            dtype=float
+        )
+
+        self.global_branches = ['jet_pt', 'jet_eta',
+                                'TagVarCSV_trackSumJetEtRatio',
+                                'TagVarCSV_trackSumJetDeltaR',
+                                'TagVarCSV_vertexCategory',
+                                'TagVarCSV_trackSip2dValAboveCharm',
+                                'TagVarCSV_trackSip2dSigAboveCharm',
+                                'TagVarCSV_trackSip3dValAboveCharm',
+                                'TagVarCSV_trackSip3dSigAboveCharm',
+                                'TagVarCSV_jetNSelectedTracks',
+                                'TagVarCSV_jetNTracksEtaRel']
+
+        self.track_branches = ['TagVarCSVTrk_trackJetDistVal',
+                              'TagVarCSVTrk_trackPtRel', 
+                              'TagVarCSVTrk_trackDeltaR', 
+                              'TagVarCSVTrk_trackPtRatio', 
+                              'TagVarCSVTrk_trackSip3dSig', 
+                              'TagVarCSVTrk_trackSip2dSig', 
+                              'TagVarCSVTrk_trackDecayLenVal']
+        self.n_track = 6
+        
+        self.eta_rel_branches = ['TagVarCSV_trackEtaRel']
+        self.n_eta_rel = 4
+
+        self.vtx_branches = ['TagVarCSV_vertexMass', 
+                          'TagVarCSV_vertexNTracks', 
+                          'TagVarCSV_vertexEnergyRatio',
+                          'TagVarCSV_vertexJetDeltaR',
+                          'TagVarCSV_flightDistance2dVal', 
+                          'TagVarCSV_flightDistance2dSig', 
+                          'TagVarCSV_flightDistance3dVal', 
+                          'TagVarCSV_flightDistance3dSig']
+        self.n_vtx = 1
+                
+        self.reduced_truth = ['isB','isBB','isC','isUDSG']
+
+    def readTreeFromRootToTuple(self, filenames, limit=None, branches=None):
+        '''
+        To be used to get the initial tupel for further processing in inherting classes
+        Makes sure the number of entries is properly set
+        
+        can also read a list of files (e.g. to produce weights/removes from larger statistics
+        (not fully tested, yet)
+        '''
+        
+        if branches is None or len(branches) == 0:
+            return np.array([],dtype='float32')
+            
+        #print(branches)
+        #remove duplicates
+        usebranches=list(set(branches))
+        tmpbb=[]
+        for b in usebranches:
+            if len(b):
+                tmpbb.append(b)
+        usebranches=tmpbb
+            
+        import ROOT
+        from root_numpy import tree2array, root2array
+        if isinstance(filenames, list):
+            for f in filenames:
+                fileTimeOut(f,120)
+            print('add files')
+            nparray = root2array(
+                filenames, 
+                treename = "deepntuplizer/tree", 
+                stop = limit,
+                branches = usebranches
+                )
+            print('done add files')
+            return nparray
+            print('add files')
+        else:    
+            fileTimeOut(filenames,120) #give eos a minute to recover
+            rfile = ROOT.TFile(filenames)
+            tree = rfile.Get(self.treename)
+            if not self.nsamples:
+                self.nsamples=tree.GetEntries()
+            nparray = tree2array(tree, stop=limit, branches=usebranches)
+            return nparray
+        
+    def createWeighterObjects(self, allsourcefiles):
+        # 
+        # Calculates the weights needed for flattening the pt/eta spectrum
+        
+        from DeepJetCore.Weighter import Weighter
+        weighter = Weighter()
+        weighter.undefTruth = self.undefTruth
+        branches = [self.weightbranchX,self.weightbranchY]
+        branches.extend(self.truth_branches)
+
+        if self.remove:
+            weighter.setBinningAndClasses(
+                [self.weight_binX,self.weight_binY],
+                self.weightbranchX,self.weightbranchY,
+                self.truth_branches
+            )
+
+        
+        counter=0
+        import ROOT
+        from root_numpy import tree2array, root2array
+        if self.remove:
+            for fname in allsourcefiles:
+                fileTimeOut(fname, 120)
+                nparray = root2array(
+                    fname,
+                    treename = "deepntuplizer/tree",
+                    stop = None,
+                    branches = branches
+                )
+                weighter.addDistributions(nparray)
+                #del nparray
+                counter=counter+1
+                weighter.createRemoveProbabilitiesAndWeights(self.referenceclass)
+
+        print("calculate means")
+        from DeepJetCore.preprocessing import meanNormProd
+        nparray = self.readTreeFromRootToTuple(allsourcefiles,branches=self.vtx_branches+self.eta_rel_branches+self.track_branches+self.global_branches, limit=500000)
+        for a in (self.vtx_branches+self.eta_rel_branches+self.track_branches+self.global_branches):
+            for b in range(len(nparray[a])):
+                nparray[a][b] = np.where(nparray[a][b] < 100000.0, nparray[a][b], 0)
+        means = np.array([],dtype='float32')
+        if len(nparray):
+            means = meanNormProd(nparray)
+        return {'weigther':weighter,'means':means}
+    
+    def convertFromSourceFile(self, filename, weighterobjects, istraining):
+
+        # Function to produce the numpy training arrays from root files
+
+        from DeepJetCore.Weighter import Weighter
+        from DeepJetCore.stopwatch import stopwatch
+        sw=stopwatch()
+        swall=stopwatch()
+        print(weighterobjects)
+        if not istraining:
+            self.remove = False
+                
+        def reduceTruth(uproot_arrays):
+            
+            b = uproot_arrays[b'isB']
+            
+            bb = uproot_arrays[b'isBB']
+            gbb = uproot_arrays[b'isGBB']
+            
+            bl = uproot_arrays[b'isLeptonicB']
+            blc = uproot_arrays[b'isLeptonicB_C']
+            lepb = bl+blc
+            
+            c = uproot_arrays[b'isC']
+            cc = uproot_arrays[b'isCC']
+            gcc = uproot_arrays[b'isGCC']
+            
+            ud = uproot_arrays[b'isUD']
+            s = uproot_arrays[b'isS']
+            uds = ud+s
+            
+            g = uproot_arrays[b'isG']
+            
+            return np.vstack((b+lepb,bb+gbb,c+cc+gcc,uds+g)).transpose()
+        
+        print('reading '+filename)
+        
+        import ROOT
+        from root_numpy import tree2array, root2array
+        fileTimeOut(filename,120) #give eos a minute to recover
+        rfile = ROOT.TFile(filename)
+        tree = rfile.Get("deepntuplizer/tree")
+        self.nsamples = tree.GetEntries()  
+        # user code, example works with the example 2D images in root format generated by make_example_data
+        from DeepJetCore.preprocessing import MeanNormZeroPad,MeanNormZeroPadParticles
+        x_global = MeanNormZeroPad(filename,weighterobjects['means'],
+                                   [self.global_branches,self.track_branches,self.eta_rel_branches,self.vtx_branches],
+                                   [1,self.n_track,self.n_eta_rel,self.n_vtx],self.nsamples)
+                
+        import uproot
+        urfile = uproot.open(filename)["deepntuplizer/tree"]
+        truth_arrays = urfile.arrays(self.truth_branches)
+        truth = reduceTruth(truth_arrays)
+        truth = truth.astype(dtype='float32', order='C') #important, float32 and C-type!
+
+        x_global = x_global.astype(dtype='float32', order='C')
+        
+        
+        if self.remove:
+            b = [self.weightbranchX,self.weightbranchY]
+            b.extend(self.truth_branches)
+            b.extend(self.undefTruth)
+            fileTimeOut(filename, 120)
+            for_remove = root2array(
+                filename,
+                treename = "deepntuplizer/tree",
+                stop = None,
+                branches = b
+            )
+            notremoves=weighterobjects['weigther'].createNotRemoveIndices(for_remove)
+            undef=for_remove['isUndefined']
+            notremoves-=undef
+            print('took ', sw.getAndReset(), ' to create remove indices')
+
+
+        if self.remove:
+            print('remove')
+            x_global=x_global[notremoves > 0]
+            truth=truth[notremoves > 0]
+
+        newnsamp=x_global.shape[0]
+        print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%')
+
+        
+        print('remove nans')
+        x_global = np.where(np.isfinite(x_global), x_global, 0)
+        
+        return [x_global], [truth], []
+    
+    ## defines how to write out the prediction
+    def writeOutPrediction(self, predicted, features, truth, weights, outfilename, inputfile):
+        # predicted will be a list
+        
+        from root_numpy import array2root
+        out = np.core.records.fromarrays(np.vstack( (predicted[0].transpose(),truth[0].transpose(), features[0][:,0:2].transpose() ) ),
+                                         names='prob_isB, prob_isBB, prob_isC,prob_isUDSG,isB, isBB, isC,isUDSG,jet_pt, jet_eta')
+        array2root(out, outfilename, 'tree')
diff --git a/modules/models/convolutional.py b/modules/models/convolutional.py
index e89ee0f..84e60cf 100644
--- a/modules/models/convolutional.py
+++ b/modules/models/convolutional.py
@@ -56,3 +56,31 @@ def model_deepFlavourReference(Inputs,dropoutRate=0.1,momentum=0.6):
     return model
 
 
+def model_deepCSV(Inputs,dropoutRate=0.1):
+    """
+    reference 1x1 convolutional model for 'deepFlavour'
+    with recurrent layers and batch normalisation
+    standard dropout rate it 0.1
+    should be trained for flavour prediction first. afterwards, all layers can be fixed
+    that do not include 'regression' and the training can be repeated focusing on the regression part
+    (check function fixLayersContaining with invert=True)
+    """  
+    globalvars = Inputs[0]
+
+    x = Dense(100, activation='relu',kernel_initializer='lecun_uniform')(Inputs[0])
+    x = Dropout(dropoutRate)(x)
+    x = Dense(100, activation='relu',kernel_initializer='lecun_uniform')(x)
+    x = Dropout(dropoutRate)(x)
+    x = Dense(100, activation='relu',kernel_initializer='lecun_uniform')(x)
+    x = Dropout(dropoutRate)(x)
+    x = Dense(100, activation='relu',kernel_initializer='lecun_uniform')(x)
+    x = Dropout(dropoutRate)(x)
+    x = Dense(100, activation='relu',kernel_initializer='lecun_uniform')(x)
+
+    predictions = Dense(4, activation='softmax',kernel_initializer='lecun_uniform')(x)
+    
+    model = Model(inputs=Inputs, outputs=predictions)
+    return model
+
+
+
diff --git a/modules/scripts/plot_roc.py b/modules/scripts/plot_roc.py
new file mode 100644
index 0000000..c40d1b2
--- /dev/null
+++ b/modules/scripts/plot_roc.py
@@ -0,0 +1,84 @@
+print("start import")
+import os
+import matplotlib
+matplotlib.use('Agg')
+from sklearn.metrics import roc_curve, roc_auc_score
+from scipy.interpolate import InterpolatedUnivariateSpline
+from pdb import set_trace
+from itertools import chain
+import matplotlib.pyplot as plt
+import pandas as pd
+import numpy as np
+import root_numpy
+import ROOT
+from ROOT import TCanvas, TGraph, TGraphAsymmErrors, TH2F, TH1F
+print("finish import")
+from root_numpy import fill_hist
+
+
+def spit_out_roc(disc,truth_array,selection_array):
+
+    newx = np.logspace(-3.5, 0, 100)
+    tprs = pd.DataFrame()
+    truth = truth_array[selection_array]*1
+    disc = disc[selection_array]
+    tmp_fpr, tmp_tpr, _ = roc_curve(truth, disc)
+    coords = pd.DataFrame()
+    coords['fpr'] = tmp_fpr
+    coords['tpr'] = tmp_tpr
+    clean = coords.drop_duplicates(subset=['fpr'])
+    spline = InterpolatedUnivariateSpline(clean.fpr, clean.tpr,k=1)
+    tprs = spline(newx)
+    return tprs, newx
+
+
+
+pred = []
+isDeepJet = False
+if isDeepJet:
+    listbranch = ['prob_isB', 'prob_isBB','prob_isLeptB', 'prob_isC','prob_isUDS','prob_isG','isB', 'isBB', 'isLeptB', 'isC','isUDS','isG','jet_pt', 'jet_eta']
+else:
+    listbranch = ['prob_isB', 'prob_isBB', 'prob_isC','prob_isUDSG','isB', 'isBB', 'isC','isUDSG','jet_pt', 'jet_eta']
+
+dirz = '/data/ml/ebols/DeepCSV_PredictionsTest_2/'
+truthfile = open( dirz+'outfiles.txt','r')
+print("opened text file")
+rfile1 = ROOT.TChain("tree")
+count = 0
+
+for line in truthfile:
+    count += 1
+    if len(line) < 1: continue
+    file1name=str(dirz+line.split('\n')[0])
+    rfile1.Add(file1name)
+
+print("added files")
+df = root_numpy.tree2array(rfile1, branches = listbranch)
+print("converted to root")
+
+if isDeepJet:
+    b_jets = df['isB']+df['isBB']+df['isLeptB']
+    disc = df['prob_isB']+df['prob_isBB']+df['prob_isLeptB']
+    summed_truth = df['isB']+df['isBB']+df['isLeptB']+df['isC']+df['isUDS']+df['isG']
+    veto_c = (df['isC'] != 1) & ( df['jet_pt'] > 30) & (summed_truth != 0)
+    veto_udsg = (df['isUDS'] != 1) & (df['isG'] != 1) & ( df['jet_pt'] > 30) & (summed_truth != 0)
+else:
+    b_jets = df['isB']+df['isBB']
+    disc = df['prob_isB']+df['prob_isBB']
+    summed_truth = df['isB']+df['isBB']+df['isC']+df['isUDSG']
+    veto_c = (df['isC'] != 1) & (summed_truth != 0)
+    veto_udsg = (df['isUDSG'] != 1) & (summed_truth != 0)
+
+
+f = ROOT.TFile("ROCS_DeepCSV.root", "recreate")
+
+x1, y1 = spit_out_roc(disc,b_jets,veto_c)
+x2, y2 = spit_out_roc(disc,b_jets,veto_udsg)
+
+gr1 = TGraph( 100, x1, y1 )
+gr1.SetName("roccurve_0")
+gr2 = TGraph( 100, x2, y2 )
+gr2.SetName("roccurve_1")
+gr1.Write()
+gr2.Write()
+f.Write()