diff --git a/.github/workflows/integration-testing.yml b/.github/workflows/integration-testing.yml
new file mode 100644
index 0000000..a9accb2
--- /dev/null
+++ b/.github/workflows/integration-testing.yml
@@ -0,0 +1,58 @@
+name: Integration tests V4
+
+on:
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  container_job:
+    runs-on: ubuntu-latest
+
+    services:
+      mongo:
+        image: mongo
+        ports:
+          - 27107:27107
+      scicat-backend:
+        image: ghcr.io/scicatproject/scicat-backend-next:stable
+        ports:
+          - 3000:3000
+        env:
+          MONGODB_URI: mongodb://mongo:27017/scicat
+          EXPRESS_SESSION_SECRET: "${EXPRESS_SESSION_SECRET}"
+          JWT_SECRET: "${JWT_SECRET}"
+          PORT: 3000
+          HTTP_MAX_REDIRECTS: 5
+          HTTP_TIMEOUT: 5000
+          JWT_EXPIRES_IN: 3600
+          SITE: SAMPLE-SITE
+          PID_PREFIX: PID.SAMPLE.PREFIX
+          DOI_PREFIX: DOI.SAMPLE.PREFIX
+          METADATA_KEYS_RETURN_LIMIT: 100
+          METADATA_PARENT_INSTANCES_RETURN_LIMIT: 100
+          ADMIN_GROUPS: admin,ingestor
+
+    steps:
+        - uses: actions/checkout@v2
+        - uses: actions/setup-python@v2
+          with:
+            python-version: ${{ matrix.python-version }}
+
+        - run: source continuous_integration/scripts/install.sh
+
+
+        -  run: |
+            set -vxeuo pipefail
+            python -m pip install .
+            python -m pip install .[dev]
+            python -m pip list
+
+        - run: |
+            set -vxeuo pipefail
+            coverage run -m pytest tests/tests_integration/tests_integration.py
+            coverage report
+          env:
+            BASE_URL: http://localhost:3000/api/v3
+            SCICAT_USER: ingestor
+            SCICAT_PASSWORD: aman
\ No newline at end of file
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
deleted file mode 100644
index 392e281..0000000
--- a/.github/workflows/linting.yml
+++ /dev/null
@@ -1,14 +0,0 @@
-name: pre-commit
-
-on:
-  pull_request:
-  push:
-    branches: [main]
-
-jobs:
-  pre-commit:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v2
-    - uses: actions/setup-python@v2
-    - uses: pre-commit/action@v2.0.3
\ No newline at end of file
diff --git a/.github/workflows/publish-documentation.yml b/.github/workflows/publish-documentation.yml
index 78f4544..5a1571f 100644
--- a/.github/workflows/publish-documentation.yml
+++ b/.github/workflows/publish-documentation.yml
@@ -37,7 +37,7 @@ jobs:
       shell: bash -l {0}
       run: |
         set -vxeuo pipefail
-        python -m pip install -r requirements-dev.txt
+        python -m pip install .[dev]
         python -m pip list
     - name: Build Docs
       shell: bash -l {0}
diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml
index 5b95152..b71b1be 100644
--- a/.github/workflows/testing.yml
+++ b/.github/workflows/testing.yml
@@ -7,6 +7,7 @@ on:
       - cron: '00 4 * * *'  # daily at 4AM
 
 jobs:
+
   build:
 
     runs-on: ubuntu-latest
@@ -31,11 +32,19 @@ jobs:
       shell: bash -l {0}
       run: |
         set -vxeuo pipefail
-        python -m pip install -r requirements-dev.txt
+        python -m pip install .[dev]
+        python -m pip install .[hdf5]
         python -m pip list
+    
+    - name: Lint with flake8
+      shell: bash -l {0}
+      run: |
+        set -vxeuo pipefail
+        python -m flake8
+      
     - name: Test with pytest
       shell: bash -l {0}
       run: |
         set -vxeuo pipefail
-        coverage run -m pytest -v
+        coverage run -m pytest --ignore tests_integration -v 
         coverage report
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
deleted file mode 100644
index 0baa145..0000000
--- a/.pre-commit-config.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-default_language_version:
-    python: python3
-repos:
-  - repo: https://github.com/ambv/black
-    rev: 21.12b0
-    hooks:
-      - id: black
-  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v2.0.0
-    hooks:
-      - id: flake8
-  - repo: https://github.com/kynan/nbstripout
-    rev: 0.5.0
-    hooks:
-      - id: nbstripout
diff --git a/docs/pyscicatlogo.png b/docs/pyscicatlogo.png
new file mode 100644
index 0000000..d6b31af
Binary files /dev/null and b/docs/pyscicatlogo.png differ
diff --git a/docs/pyscicatlogo.svg b/docs/pyscicatlogo.svg
new file mode 100644
index 0000000..30f3c3a
--- /dev/null
+++ b/docs/pyscicatlogo.svg
@@ -0,0 +1,164 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   version="1.1"
+   id="svg4596"
+   width="430"
+   height="550"
+   viewBox="-19 -18 430 550"
+   sodipodi:docname="pySciCatLogo.svg"
+   inkscape:version="1.2 (dc2aeda, 2022-05-15)"
+   inkscape:export-filename="pySciCatLogo.png"
+   inkscape:export-xdpi="96"
+   inkscape:export-ydpi="96"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:xlink="http://www.w3.org/1999/xlink"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:dc="http://purl.org/dc/elements/1.1/">
+  <metadata
+     id="metadata4602">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs4600">
+    <linearGradient
+       gradientUnits="userSpaceOnUse"
+       y2="131.85291"
+       x2="110.14919"
+       y1="77.070274"
+       x1="55.549179"
+       id="linearGradient9521"
+       xlink:href="#linearGradient9515"
+       inkscape:collect="always"
+       gradientTransform="matrix(2.1266723,0,0,2.1266723,71.497212,-140.82896)" />
+    <linearGradient
+       id="linearGradient9515"
+       inkscape:collect="always">
+      <stop
+         id="stop9517"
+         offset="0"
+         style="stop-color:#387eb8;stop-opacity:1" />
+      <stop
+         id="stop9519"
+         offset="1"
+         style="stop-color:#366994;stop-opacity:1" />
+    </linearGradient>
+    <linearGradient
+       gradientUnits="userSpaceOnUse"
+       y2="168.1012"
+       x2="147.77737"
+       y1="111.92053"
+       x1="89.136749"
+       id="linearGradient11307"
+       xlink:href="#linearGradient11301"
+       inkscape:collect="always"
+       gradientTransform="matrix(2.1266723,0,0,2.1266723,71.497212,-140.82896)" />
+    <linearGradient
+       id="linearGradient11301"
+       inkscape:collect="always">
+      <stop
+         id="stop11303"
+         offset="0"
+         style="stop-color:#ffe052;stop-opacity:1" />
+      <stop
+         id="stop11305"
+         offset="1"
+         style="stop-color:#ffc331;stop-opacity:1" />
+    </linearGradient>
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient9515"
+       id="linearGradient462"
+       gradientUnits="userSpaceOnUse"
+       x1="55.549179"
+       y1="77.070274"
+       x2="110.14919"
+       y2="131.85291"
+       gradientTransform="matrix(2.1266723,0,0,2.1266723,-86.308439,-219.73179)" />
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient11301"
+       id="linearGradient464"
+       gradientUnits="userSpaceOnUse"
+       x1="89.136749"
+       y1="111.92053"
+       x2="147.77737"
+       y2="168.1012"
+       gradientTransform="matrix(2.1266723,0,0,2.1266723,-86.308439,-219.73179)" />
+  </defs>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="1434"
+     inkscape:window-height="789"
+     id="namedview4598"
+     showgrid="false"
+     inkscape:zoom="0.70166176"
+     inkscape:cx="148.93216"
+     inkscape:cy="344.18293"
+     inkscape:window-x="0"
+     inkscape:window-y="25"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="svg4596"
+     fit-margin-top="15"
+     fit-margin-left="15"
+     fit-margin-right="15"
+     fit-margin-bottom="15"
+     inkscape:showpageshadow="2"
+     inkscape:pagecheckerboard="0"
+     inkscape:deskcolor="#d1d1d1" />
+  <path
+     style="color:#000000;display:inline;overflow:visible;visibility:visible;fill:url(#linearGradient462);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:2.12667;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none"
+     d="m 92.5625,407.42434 -9.41211,8.95703 c -23.938015,22.77458 -46.099754,44.52525 -46.302734,45.44531 -0.361779,1.6399 3.978181,8.194 7.222657,10.90626 3.382537,2.82764 6.295697,4.44389 10.277344,5.70118 l 2.722655,0.85937 68.466798,0.0663 68.46874,0.0644 3.59768,-0.93555 c 4.9917,-1.30016 9.36797,-3.39193 12.07811,-5.77149 3.01934,-2.651 7.05468,-8.78207 7.05468,-10.71875 v -1.53516 l -26.48827,-26.51942 -26.48828,-26.51954 h -35.59962 z"
+     id="path456" />
+  <path
+     style="color:#000000;display:inline;overflow:visible;visibility:visible;fill:url(#linearGradient9521);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:2.12667;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none"
+     d="m 283.63182,2.6552953 c -59.61424,0 -55.89062,25.8515597 -55.89062,25.8515597 l 0.0664,26.783205 h 56.88866 v 8.041016 H 205.2119 c 0,0 -38.14648,-4.325154 -38.14648,55.826184 -10e-6,60.15131 33.29492,58.01757 33.29492,58.01757 h 19.87109 v -27.9121 c 0,0 -1.07144,-33.29689 32.76367,-33.29689 h 56.42383 c 0,0 31.70118,0.51306 31.70118,-30.63671 V 33.823265 c 0,-10e-6 4.81129,-31.1679697 -57.48829,-31.1679697 z M 252.26464,20.665055 c 5.65998,0 10.23437,4.5744 10.23437,10.23438 0,5.65998 -4.57439,10.23438 -10.23437,10.23438 -5.65999,0 -10.23438,-4.5744 -10.23438,-10.23438 0,-5.65998 4.57439,-10.23438 10.23438,-10.23438 z"
+     id="path8615" />
+  <path
+     style="color:#000000;display:inline;overflow:visible;visibility:visible;fill:url(#linearGradient464);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:2.12667;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none"
+     d="m 222.10547,417.1431 15.64844,15.69921 15.65039,15.69922 v 5.75781 c 0,6.37821 -1.09979,12.3974 -3.58595,19.64454 l -1.64647,4.80078 5.44921,-0.30469 c 8.91289,-0.49588 19.58504,-2.10337 20.16211,-3.0371 0.20161,-0.32615 0.8921,-0.59181 1.53516,-0.59181 3.59944,0 16.29312,-8.16226 22.77539,-14.64452 3.71221,-3.71224 10.64259,-13.04308 10.64259,-14.32814 0,-0.20693 0.58175,-1.64683 1.29295,-3.20117 1.92258,-4.20182 4.70703,-12.95712 4.70703,-14.80077 0,-0.88731 0.30131,-1.79878 0.66799,-2.0254 0.98998,-0.61184 0.8425,-7.64337 -0.16801,-8.01758 -0.45832,-0.16971 -21.60048,-0.38471 -46.98242,-0.47852 z"
+     id="path460" />
+  <path
+     style="color:#000000;display:inline;overflow:visible;visibility:visible;fill:url(#linearGradient11307);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:2.12667;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none"
+     d="m 368.97167,61.731468 c -0.24387,9.37e-4 -0.37304,0.0098 -0.37304,0.0098 h -19.8711 v 27.912106 c 0,0 1.07146,33.296886 -32.76368,33.296886 h -56.42382 c 0,0 -31.70118,-0.51308 -31.70118,30.63671 v 51.50586 c 0,0 -4.81326,31.16797 57.48633,31.16797 59.61426,0 55.89258,-25.85157 55.89258,-25.85157 l -0.0663,-26.78318 h -56.88867 v -8.04104 h 79.48437 c 0,0 38.14649,4.32712 38.14649,-55.82421 2e-5,-56.391851 -29.26389,-58.043365 -32.92187,-58.029297 z M 316.69432,197.78224 c 5.65999,0 10.23437,4.57439 10.23437,10.23438 3e-5,5.65999 -4.57438,10.23438 -10.23437,10.23438 -5.65999,0 -10.23438,-4.57439 -10.23438,-10.23438 0,-5.65999 4.57439,-10.23438 10.23438,-10.23438 z"
+     id="path8620" />
+  <path
+     id="path4719"
+     style="fill:#737373;fill-opacity:1;stroke-width:0.666667"
+     d="m 204.07011,495.84677 c 15.49116,-5.90487 27.74043,-20.83916 29.67698,-36.18212 l 0.40888,-3.23944 -42.87626,-42.82052 -42.87626,-42.82051 -10.33334,-0.17949 -10.33333,-0.17948 v -8.33333 -8.33333 l 7.10322,-0.18858 7.10322,-0.18858 0.75122,-0.90516 c 0.49195,-0.59277 0.68641,-3.74882 0.56345,-9.14476 l -0.18778,-8.23959 -7.66666,-0.33333 -7.66667,-0.33334 v -8.66666 -8.66667 l 7.66667,-0.33333 7.66666,-0.33334 0.18778,-8.23959 c 0.12296,-5.39594 -0.0715,-8.55199 -0.56345,-9.14476 l -0.75122,-0.90516 -7.10322,-0.18857 -7.10322,-0.18858 v -8.33334 -8.33333 l 6.9615,-0.19141 c 3.82882,-0.10526 7.35382,-0.517 7.83333,-0.91496 l 0.87184,-0.72356 v -7.83674 -7.83674 l -0.87184,-0.72356 c -0.47951,-0.39796 -4.00451,-0.80969 -7.83333,-0.91496 l -6.9615,-0.1914 v -8.33334 -8.33333 l 7.13641,-0.33333 c 3.92502,-0.18334 7.44483,-0.64314 7.82178,-1.02179 0.42449,-0.42639 0.75322,-7.27818 0.8636,-18 l 0.17821,-17.31155 10.36591,-0.33333 10.36592,-0.33333 1.79146,-1.17596 c 5.9843,-3.92819 5.47704,-12.7695 -0.91559,-15.95869 l -2.27437,-1.13465 -38,-0.0295 -38,-0.0295 -2.33333,1.39321 c -3.66493,2.18839 -4.63516,3.82193 -4.65184,7.83216 l -0.0148,3.56375 1.37969,1.9376 c 0.75884,1.06569 2.03384,2.312 2.83334,2.76958 l 1.45361,0.832 19.33333,0.33333 19.33334,0.33333 v 8.66667 8.66667 l -7.95849,0.1889 c -4.46836,0.10606 -8.34215,0.50732 -8.83333,0.91496 l -0.87485,0.72606 v 69.00326 69.00326 l -22.5,23.07891 c -12.375,12.6934 -32.175,32.86543 -44,44.82675 l -21.5,21.74784 v 3.31159 c 0,7.85219 5.38259,18.90949 12.83333,26.36313 2.84167,2.84276 5.44674,5.16867 5.78904,5.16867 0.3423,0 2.0673,0.89719 3.83333,1.99375 9.65313,5.99379 7.87629,5.87766 88.21097,5.76485 l 69.66666,-0.0978 z M 55.73678,515.06379 C 39.46366,513.0084 29.29333,507.54342 17.54371,494.54091 7.474627,483.39816 2.42208,472.07423 0.75346,456.91012 L 0,450.06284 1.325787,448.24403 C 2.054967,447.24368 22.47074,426.62607 46.69417,402.42711 L 90.73678,358.429 v -65.84952 -65.84952 l -1.5,-0.5701 c -0.825,-0.31355 -2.9269,-0.90648 -4.67089,-1.31761 -17.50402,-4.12642 -25.47281,-22.11814 -17.41508,-39.31925 3.81532,-8.14467 12.74118,-13.00339 25.58597,-13.92751 0,0 4.662144,-0.41962 7,-0.50361 1.49903,-0.0539 0,-63.52892 0,-63.52892 h 18 c 0,0 -1.5,63.52892 0,63.52892 h 9 9 l 0.35979,-116.000004 0.3598,-34 0.79875,-2.333333 c 2.19886,-6.423367 8.07117,-13.5153267 12.81499,-15.4765597 8.36374,-3.45780704 14.51642,-4.13202004 20.66734,-2.264734 10.9675,3.329494 19.29376,13.8441597 19.32794,24.4079597 0.018,5.553407 4.33304,9.992557 9.71806,9.997507 l 16.71333,0.003 c 0.36185,7.25623 0.27768,16.926473 0.27768,16.926473 0.22716,0.07572 -17.66577,0.841724 -26.4078,-1.541948 -9.71544,-3.38343 -16.57212,-11.697995 -17.6238,-21.901045 -0.5792,-5.619203 -1.34353,-7.474896 -3.99917,-9.70947 l -2.10854,-1.77422 h -3.38592 -3.38593 l -2.20808,1.684187 c -3.58014,2.730709 -3.66317,3.653427 -3.44783,38.315813 l 0.19606,113.666374 7,0.50361 c 15.77826,1.13517 24.30922,7.5221 27.92534,20.90707 3.03494,11.23377 -3.28902,25.30317 -13.40841,29.83073 -5.87229,2.62736 -7.18449,3.09192 -8.7333,3.09192 -0.81783,0 -2.15371,0.46702 -2.96863,1.0378 l -1.48167,1.03781 v 65.11471 65.1147 l 6.76527,6.68082 c 3.7209,3.67446 24.1209,24.10688 45.33333,45.40539 l 38.56807,38.72456 v 5.73185 c 0,10.05565 -3.14075,21.08985 -8.44786,29.67936 -5.66004,9.1607 -19.14324,21.69914 -27.55346,25.62278 -13.90368,6.48652 -14.54932,6.53191 -91.33202,6.42198 l -63.66666,-0.0911 z"
+     inkscape:label="left"
+     inkscape:connector-curvature="0"
+     sodipodi:nodetypes="sscccccccccsscccccccsscccccsscccsscccssscccssccccscsccccccsscccsscssssscsssscsscccssssccccccccssssccccsscccsccssssscccsscsssscs" />
+  <path
+     style="fill:#737373;fill-opacity:1;stroke-width:0.666667"
+     d="m 249.40345,515.00278 c -4.30487,-0.54727 -17.4699,-4.49108 -17.79176,-5.32983 -0.34394,-0.89628 9.81903,-11.31448 12.30276,-12.61172 l 1.511,-0.78919 3.82233,0.90034 c 9.22846,2.17373 27.88887,-1.4651 41.40686,-8.07446 26.20139,-12.81067 44.74881,-42.5734 44.74881,-71.80766 0,-21.25985 -8.00986,-41.77133 -22.25103,-56.97996 -11.48659,-12.26693 -23.32522,-18.89854 -42.08231,-23.57314 -3.3,-0.82242 -6.52322,-1.94894 -7.16272,-2.50338 l -1.16272,-1.00808 -0.17061,-31.40024 -0.17061,-29.40025 h -8.66667 -8.66667 l -0.17066,29.63734 c -0.0939,29.50052 -0.4123,31.87896 -0.70764,32.1743 -0.29534,0.29534 -2.76854,1.13316 -5.496,1.86182 -8.84413,2.36277 -10.21482,2.8061 -15.6257,5.05383 -7.27836,3.0235 -12.6035,5.95135 -17.2092,9.46187 -4.47771,3.41297 -15.53783,14.2106 -17.54832,17.13186 l -1.36799,1.9877 -1.63533,-0.74512 c -1.60886,-0.73304 -11.35901,-9.2982 -11.79657,-10.36285 -0.55245,-1.34419 11.65794,-14.60237 18.55741,-20.14985 8.1015,-6.51396 18.25224,-12.18935 28.3011,-15.82344 2.9156,-1.05441 5.53717,-2.28694 5.82572,-2.73895 0.28855,-0.45201 0.54707,0.32766 0.36558,-25.15517 l -0.15906,-22.33334 -7.95849,-0.1889 c -4.58161,-0.10875 -8.33807,-0.50393 -8.85307,-0.93134 l -0.89458,-0.74244 0.1864,-8.06866 0.1864,-8.06866 h 44.66667 44.66667 v 8.66667 8.66667 l -7.95849,0.1889 c -4.46836,0.10606 -8.34215,0.50732 -8.83333,0.91496 l -0.87485,0.72606 v 21.81505 23.81504 l 1.16667,0.98487 c 0.64166,0.54168 2.51666,1.38148 4.16666,1.86623 3.50234,1.02895 17.12904,7.48507 21,9.94948 20.51424,13.06022 37.77452,37.27511 43.05722,60.40607 11.6058,50.81748 -14.21114,98.68288 -63.39055,117.52806 -3.51868,1.34832 -8.12981,2.51826 -16,4.0595 -4.1301,0.80882 -17.77698,1.44212 -21.33333,0.99001 z"
+     id="path4717"
+     inkscape:label="right"
+     inkscape:connector-curvature="0"
+     sodipodi:nodetypes="ssscssssssccccccssssscsssssscsscccccccsscccsssssss" />
+  <path
+     id="path4697"
+     style="fill:#737373;fill-opacity:1;stroke-width:0.666667"
+     d="m 100.51041,76.094928 c 0.47968,-0.18407 4.45468,-0.24965 8.83333,-0.14574 l 7.9612,0.18893 v 8.66666 8.66667 l -7.90544,0.18845 c -5.25854,0.12535 -8.21613,-0.0694 -8.83334,-0.58163 l -0.927878,-0.77008 v -7.93929 -7.9393 l 0.872138,-0.33467 z m -0.0206,-35.99209 c 0.49102,-0.18842 4.47531,-0.25756 8.85396,-0.15365 l 7.96117,0.18893 v 8.33333 8.33333 h -8.66666 -8.666668 l -0.18729,-8.17967 -0.18729,-8.17968 0.892768,-0.34259 z m -0.0515,-36.1647201 c 0.52679,-0.5268 3.34482,-0.8 8.25171,-0.8 h 7.4517 l 0.77008,0.92788 c 0.51223,0.61721 0.70698,3.5748 0.58162,8.8333401 l -0.18844,7.90544 -7.90545,0.18845 c -5.25853,0.12535 -8.21613,-0.0694 -8.83333,-0.58163 l -0.927888,-0.77008 v -7.4517 c 0,-4.9068801 0.27321,-7.7249101 0.799998,-8.2517001 z"
+     inkscape:label="dots"
+     inkscape:connector-curvature="0" />
+</svg>
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 48a8171..28b3c36 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -90,7 +90,7 @@
 #
 # This is also used if you do content translation via gettext catalogs.
 # Usually you set "language" from the command line for these cases.
-language = None
+language = "en"
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
diff --git a/docs/source/howto/ingest.md b/docs/source/howto/ingest.md
index 05cf37d..a694ed0 100644
--- a/docs/source/howto/ingest.md
+++ b/docs/source/howto/ingest.md
@@ -13,6 +13,7 @@ from pyscicat.model import (
     Datablock,
     DataFile,
     Dataset,
+    Sample,
     Ownable
 )
 
@@ -61,6 +62,19 @@ Note that we store the provided dataset_id in a variable for later use.
 
 Also note the `sourceFolder`. This is a folder on the file system that SciCat has access to, and will contain the files for this `Dataset`.
 
+Proposals and instruments have to be created by an administrator. A sample with `sampleId="gargleblaster"` can be created like this:
+```python
+sample = Sample(
+    sampleId="gargleblaster",
+    owner="Chamber of Commerce",
+    description="A legendary drink.",
+    sampleCharacteristics={"Flavour": "Unknown, but potent"},
+    isPublished=False,
+    **ownable.dict()
+)
+sample_id = client.upload_sample(sample)  # sample_id == "gargleblaster"
+```
+
 ## Upload a Datablock
 
 ```python
diff --git a/docs/source/howto/ingestion_simulation_dataset_ess.md b/docs/source/howto/ingestion_simulation_dataset_ess.md
new file mode 100644
index 0000000..5491b60
--- /dev/null
+++ b/docs/source/howto/ingestion_simulation_dataset_ess.md
@@ -0,0 +1,248 @@
+# Ingest Simulation Dataset at ESS
+In the process of designing and commissioning of the ESS, many simulation datasets have been produced in the process of finding the best design and validate them.
+At ESS, we have decided to import such datsets in to our SciCat instance to facilitate search, assess quickly the comulative quality of the collected results and be able to start applying Machine Learning techniques to such data in the near future.
+
+## Background
+Data scientist and modeller at ESS have produced many simulations each one including multiple variations of the same design running parameters exploration. 
+The process of ingesting all this information into SciCat will produce around a thousands new datasets.
+To facilitate testing and validation of all the information at each step of the process, data curators have decided to break down the process in multiple scripts which comulative collect all the information needed to create a meaningful entry in SciCat.  
+The process produces one json file containing the basic information, metadata and files associated with one datasets.
+The last step is to read such file and inges it into SciCat.
+The rest of this document covers all the code used to load the dataset information, create the matching models and create a new dataset and orig datablock in SciCat.
+
+## Individual Dataset entry
+Each dataset is prepared for ingestion and save in an individual json file.
+The example json file is available under the example/data folder and has the following structure:
+
+```json
+{
+    "id": "0275d813-be6b-444f-812f-b8311d129361", 
+    "dataset": {
+        "datasetName": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE",
+        "description": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE", 
+        "principalInvestigator": "Max Novelli", 
+        "creationLocation": "DMSC", 
+        "owner": "Massimiliano Novelli", 
+        "ownerEmail": "max.novelli@ess.eu", 
+        "contactEmail": "max.novelli@ess.eu", 
+        "sourceFolder": "/mnt/data/simulation/CAMEA/CAMEA31", 
+        "creationTime": "2022-03-07T15:44:59.000Z", 
+        "type": "raw", 
+        "techniques": [
+            {
+                "pid": "fe888574-5cc0-11ec-90c3-bf82943dec35", 
+                "name": "Simulation"
+            }
+        ], 
+        "size": 68386784, 
+        "instrumentId": "", 
+        "sampleId": "", 
+        "proposalId": "",
+        "scientificMetadata": {
+            "sample_width": {
+                "value": 0.015, 
+                "unit": "m"
+            }, 
+            "sample_height": {
+                "value": 0.015, 
+                "unit": "m"
+            }, 
+            "divergence_requirement_horizontal": {
+                "value": 0.75, 
+                "unit": "deg"
+            },
+            "omissed" : { 
+                "notes" : "Additional scientific metadata has been omitted for readability" 
+            }
+        }
+    }, 
+    "orig_datablock": {
+        "size": 68386784, 
+        "ownerGroup": "ess",
+        "accessGroups": ["dmsc", "swap"], 
+        "dataFileList": [
+            {
+                "path": "launch_all.sh", 
+                "size": 10171, 
+                "time": "2014-01-23T19:52:37.000Z"
+            }, {
+                "path": "suggested_reruns-fails.sh", 
+                "size": 448, 
+                "time": "2014-01-23T19:53:04.000Z"
+            }, { 
+                "notes" : "Additional files entries has been omitted for readability" 
+            }
+        ]
+    }, 
+    "ownable": {
+        "ownerGroup": "ess", 
+        "accessGroups": ["dmsc"]
+    }
+}
+
+```
+As you can see, the file has already been structure with the three main component of the dataset:
+- the main dataset body with scientifica metadata
+- the ownable object 
+- the orig datablock containing all the files tassociated with the dataset
+
+The three sections allows for an easier ingestion code 
+
+## Script
+The script to ingest the dataset mentioned above is available in the exampe folder with the name of `ingestion_simulation_dataset_ess.py`.
+In this section, we are going to walk through the code of this script to illustrate the various functionalities.
+
+
+### Overall decription
+The ingestion is organized in simple sections by leveraging the dataset information which is already optimally optimized to peerform the operations required to create a full dataset in SciCat.
+In order to simplify the script, it is assumed that pyscicat is installed system wide and the script is run from the folder where is saved. All the file paths are relative to the script folder.
+At the beginning of the script, libraries are imported and we define paths to the relevant json files.
+
+```python
+# libraries
+import json
+import pyscicat.client as pyScClient
+import pyscicat.model as pyScModel
+
+
+# scicat configuration file
+# includes scicat instance URL
+# scicat user and password
+scicat_configuration_file = "./data/ingestion_simulation_dataset_ess_config.json"
+simulation_dataset_file = "./data/ingestion_simulation_dataset_ess_dataset.json"
+```
+
+
+### Loading relevant information
+In the next section, the script loads the configuration needed to communicate with SciCat and the dataset information
+
+```python
+# loads scicat configuration
+with open(scicat_configuration_file,"r") as fh:
+    scicat_config = json.load(fh)
+
+
+# loads simulation information from matching json file
+with open(simulation_dataset_file,"r") as fh:
+    dataset_information = json.load(fh)
+```
+
+
+### Authentication
+Here, we instantiate the pyscicat object and perform the login.
+
+```python
+scClient = pyScClient.ScicatClient(
+    base_url=scicat_config['scicat']['host'],
+    username=scicat_config['scicat']['username'],
+    password=scicat_config['scicat']['password']
+)
+```
+
+
+### Create Ownable model
+We, than, instantiate the ownable object, which is used in assign the correct owner and access to all the other SciCat entries that we are going to create.
+
+```python
+ownable = pyScModel.Ownable(
+    **dataset_information['ownable']
+)
+```
+
+This notiation is equivalent to pass in all the ownable object properties explicitly.
+```python
+ownable = pyScModel.Ownable(
+    ownerGroup=dataset_information['ownable']['ownergroup'],
+    accessGroups=dataset_information['ownable']['accessGroups']
+)
+```
+
+
+### Create Dataset model
+Next step, we need to instantiate a raw dataset object defined in pySciCat models.
+Make sure to select the correct dataset: raw or derived. In our case, we are creating a raw one, which is specified in the dataset json file
+```python
+dataset = pyScModel.RawDataset(
+    **dataset_information['dataset'],
+    **ownable.dict()
+)
+```
+
+As highlighted in the previous section, this notation is equivalent to assign all the model properties explicitly:
+```python
+dataset = pyScModel.RawDataset(
+    datasetName=dataset_information['dataset']['datasetName'],
+    description=dataset_information['dataset']['description'],
+    creationLocation=dataset_information['dataset']['creationLocation'],
+    principalInvestigator=dataset_information['dataset']['principalInvestigator'],
+    owner=dataset_information['dataset']['owner'],
+    ownerEmail=dataset_information['dataset']['ownerEmail'],
+    ... omitted ...
+    ownerGroup=dataset_information['ownable']['ownergroup'],
+    accessGroups=dataset_information['ownable']['accessGroups']
+)
+```
+
+
+### Submit Dataset to SciCat
+We are now ready to make a post to SciCat and create a Dataset
+
+```python
+created_dataset = scClient.upload_new_dataset(dataset)
+```
+
+If the request is successful, the variable created_dataset should return the same information present in dataset with the additionl field named _pid_ which cotnains the official pid assigned to this dataset by SciCat
+
+
+### Create OrigDatablock model
+Now that we have created the dataset, we will add the list of files related to this dataset.
+As we have done with the other objects, we leverage the pySciCat model to make sure that the information is properly validated.
+In this snippet of code, we use explicit notation for the main object, and we use the expansion for the inner file model.
+
+```python
+origDataBlock = pyScModel.OrigDatablock(
+    size=dataset_information['orig_datablock']['size'],
+    datasetId=created_dataset['pid'],
+    dataFileList=[
+        pyScModel.DataFile(
+            **file
+        )
+        for file
+        in dataset_information['orig_datablock']['dataFileList']
+    ],
+    **ownable.dict()
+)
+```
+
+As highlighted before, this code is equivalent to:
+```python
+origDataBlock = pyScModel.OrigDatablock(
+    size=dataset_information['orig_datablock']['size'],
+    datasetId=created_dataset['pid'],
+    dataFileList=[
+        pyScModel.DataFile(
+            path=file['path',]
+            size=file['size'],
+            time=file['time']
+        )
+        for file
+        in dataset_information['orig_datablock']['dataFileList']
+    ],
+    ownerGroup=dataset_information['ownable']['ownergroup'],
+    accessGroups=dataset_information['ownable']['accessGroups']
+)
+```
+
+### Submit OrigDatablock
+With the original datablock object created, it is time to submit th erequest to SciCat.
+
+```python
+created_orig_datablock = scClient.upload_dataset_origdatablock(origDataBlock)
+```
+
+Similarly to the dataset creation function, this call will return the same information provided as argument, with the addition of the pid assigned to the entry by SciCat
+
+
+## Validate the dataset
+At this point, you can visit your instance of SciCat and you should see the dataset that we just created in the list of datasets. The file list can be viewed visiting the _Datafiles_ tab on the dataset details page
+
diff --git a/docs/source/index.md b/docs/source/index.md
index c32ff5a..1fae2e9 100644
--- a/docs/source/index.md
+++ b/docs/source/index.md
@@ -17,6 +17,7 @@ installation
 ```{toctree}
 :caption: How To Guides
 howto/ingest
+howto/ingestion_simulation_dataset_ess
 ```
 
 
diff --git a/examples/data/ingestion_simulation_dataset_ess_config.json b/examples/data/ingestion_simulation_dataset_ess_config.json
new file mode 100644
index 0000000..6b49bdd
--- /dev/null
+++ b/examples/data/ingestion_simulation_dataset_ess_config.json
@@ -0,0 +1,7 @@
+{
+  "scicat" : {
+    "host": "<your_scicat_url>",
+    "username": "ingestor",
+    "password": "<your_ingestor_password>"
+  }
+}
diff --git a/examples/data/ingestion_simulation_dataset_ess_derived_dataset.json b/examples/data/ingestion_simulation_dataset_ess_derived_dataset.json
new file mode 100644
index 0000000..53c18b4
--- /dev/null
+++ b/examples/data/ingestion_simulation_dataset_ess_derived_dataset.json
@@ -0,0 +1,256 @@
+{
+    "id": "9be3bd96-e256-11ec-bd08-f32122965a87",
+    "dataset": {
+        "datasetName": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE derived",
+        "description": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE",
+        "investigator": "Max Novelli",
+        "inputDatasets" : ["0275d813-be6b-444f-812f-b8311d129361"],
+        "usedSoftware" : ["python","My software"],
+        "jobParameters" : {
+            "parameter-1" : "value-1",
+            "parameter-2" : "value-2"
+        },
+        "jobLogData" : "Some jebrish about the dataset",
+        "owner": "Massimiliano Novelli",
+        "ownerEmail": "max.novelli@ess.eu",
+        "contactEmail": "max.novelli@ess.eu",
+        "sourceFolder": "/mnt/data/simulation/CAMEA/CAMEA31",
+        "creationTime": "2022-03-07T15:44:59.000Z",
+        "type": "derived",
+        "scientificMetadata": {
+            "sample_width": { "value": 0.015, "unit": "m" },
+            "sample_height": { "value": 0.015, "unit": "m" },
+            "divergence_requirement_horizontal": { "value": 0.75, "unit": "deg" },
+            "divergence_requirement_vertical": { "value": 1, "unit": "deg" },
+            "guide_sample_distance": { "value": 0.6, "unit": "m" },
+            "lower_wavelength_limit": { "value": 1, "unit": "\u00c5" },
+            "upper_wavelength_limit": { "value": 3.6, "unit": "\u00c5" },
+            "moderator_width": { "value": 0.12, "unit": "m" },
+            "moderator_height": { "value": 0.03, "unit": "m" },
+            "moderator_sample_distance": { "value": 170, "unit": "m" },
+            "parsing_variables": { "value": "guide_start , startx1 , starty1 , length1", "unit": "" },
+            "parsing_min_guide_start": { "value": 2.000035881054106, "unit": "m" },
+            "parsing_max_guide_start": { "value": 5.407538318585075, "unit": "m" },
+            "parsing_mean_guide_start": { "value": 2.3475508029429557, "unit": "m" },
+            "parsing_std_guide_start": { "value": 0.5522363822422368, "unit": "m" },
+            "parsing_min_startx1": { "value": 0.006706596967962139, "unit": "m" },
+            "parsing_max_startx1": { "value": 0.1460959338571846, "unit": "m" },
+            "parsing_mean_startx1": { "value": 0.08885675463366878, "unit": "m" },
+            "parsing_std_startx1": { "value": 0.017699812942929365, "unit": "m" },
+            "parsing_min_starty1": { "value": 0.011762187831963904, "unit": "m" },
+            "parsing_max_starty1": { "value": 0.14999127413576652, "unit": "m" },
+            "parsing_mean_starty1": { "value": 0.13009670276273638, "unit": "m" },
+            "parsing_std_starty1": { "value": 0.011522927034872269, "unit": "m" },
+            "parsing_min_length1": { "value": 28.915197821153896, "unit": "" },
+            "parsing_max_length1": { "value": 95.07944574028325, "unit": "" },
+            "parsing_mean_length1": { "value": 64.23126877070395, "unit": "" },
+            "parsing_std_length1": { "value": 10.210341803833671, "unit": "" },
+            "optimization_name": { "value": "PGESKSE", "unit": "" },
+            "configuration_summary": { "value": "PGESKSE", "unit": "" },
+            "best_figure_of_merit": { "value": "0.25293", "unit": "" },
+            "brilliance_transfer": { "value": "0.47344", "unit": "" },
+            "event_file_name_suffix": { "value": "4Hsize_3moderator_size_y", "unit": "" },
+            "number_of_parameters": { "value": 2, "unit": "" },
+            "parameters_name": { "value": "Hsize , moderator_size_y", "unit": "" },
+            "event_writen_present": { "value": true, "unit": "" },
+            "event_writen_file": { "value": "master_record-writen_4Hsize_3moderator_size_y.txt", "unit": "" },
+            "event_writen_timestamp": { "value": "2014-01-23T19:52:38", "unit": "" },
+            "event_done_present": { "value": true, "unit": "" },
+            "event_done_file": { "value": "master_record-done_4Hsize_3moderator_size_y.txt", "unit": "" },
+            "event_done_timestamp": { "value": "2014-01-25T00:35:55", "unit": "" },
+            "event_analysis_present": { "value": true, "unit": "" },
+            "event_analysis_file": { "value": "output/analysis/master_record-analyzed_4Hsize_3moderator_size_y.txt", "unit": "" },
+            "event_analysis_timestamp": { "value": "2014-01-28T14:03:02", "unit": "" },
+            "dataset_name": { "value": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE", "unit": "" },
+            "run_name": { "value": "CAMEA CAMEA31", "unit": "" },
+            "scan_name": { "value": "4Hsize_3moderator_size_y", "unit": "" },
+            "output_file_name_base": { "value": "PGESKSE_4Hsize_3moderator_size_y", "unit": "" },
+            "dataset_access_path": { "value": "/mnt/data/simulation/CAMEA/CAMEA31", "unit": "" },
+            "parameters_structure": { "value": "[{\"name\": \"Hsize\", \"value\": \"1.5\", \"index\": \"4\"}, {\"name\": \"moderator_size_y\", \"value\": \"0.03\", \"index\": \"3\"}]", "unit": "" },
+            "Hsize": { "value": 4, "unit": "cm" },
+            "moderator_size_y": { "value": 3, "unit": "m" }
+        },
+        "techniques": [
+            {
+                "pid": "fe888574-5cc0-11ec-90c3-bf82943dec35",
+                "name": "Simulation"
+            }
+        ],
+        "size": 68386784,
+        "instrumentId": ""
+    },
+    "orig_datablock": {
+        "size": 68386784,
+        "dataFileList": [
+            {
+                "path": "launch_all.sh",
+                "size": 10171,
+                "time": "2014-01-23T19:52:37.000Z"
+            },
+            {
+                "path": "suggested_reruns-fails.sh",
+                "size": 448,
+                "time": "2014-01-23T19:53:04.000Z"
+            },
+            {
+                "path": "compile_all_py.sh",
+                "size": 273,
+                "time": "2014-01-23T19:52:37.000Z"
+            },
+            {
+                "path": "clean3.sh",
+                "size": 354,
+                "time": "2014-01-25T10:44:54.000Z"
+            },
+            {
+                "path": "master_record-done_4Hsize_3moderator_size_y.txt",
+                "size": 579,
+                "time": "2014-01-25T00:35:55.000Z"
+            },
+            {
+                "path": "master_record-writen_4Hsize_3moderator_size_y.txt",
+                "size": 561,
+                "time": "2014-01-23T19:52:38.000Z"
+            },
+            {
+                "path": "compile_all.sh",
+                "size": 259,
+                "time": "2014-01-23T19:52:37.000Z"
+            },
+            {
+                "path": "output/brill_ref/brilliance_ref_4Hsize_3moderator_size_y.mat",
+                "size": 11624010,
+                "time": "2014-01-24T07:56:45.000Z"
+            },
+            {
+                "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y1_acceptance_ess.png",
+                "size": 521132,
+                "time": "2014-01-27T11:38:06.000Z"
+            },
+            {
+                "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y1_acceptance_pure.png",
+                "size": 518423,
+                "time": "2014-01-27T11:37:52.000Z"
+            },
+            {
+                "path": "output/analysis/master_record-analyzed_4Hsize_3moderator_size_y.txt",
+                "size": 587,
+                "time": "2014-01-28T14:03:02.000Z"
+            },
+            {
+                "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y1_overall_pure.png",
+                "size": 144605,
+                "time": "2014-01-27T11:37:49.000Z"
+            },
+            {
+                "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y1_posdiv_ess.png",
+                "size": 336496,
+                "time": "2014-01-27T11:38:04.000Z"
+            },
+            {
+                "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y_all.mat",
+                "size": 34321077,
+                "time": "2014-01-25T00:35:55.000Z"
+            },
+            {
+                "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y1_overall_ess.png",
+                "size": 127660,
+                "time": "2014-01-27T11:38:02.000Z"
+            },
+            {
+                "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y1_geometry.dat",
+                "size": 2175,
+                "time": "2014-01-25T00:23:10.000Z"
+            },
+            {
+                "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y_ifit_analyse.m",
+                "size": 19482,
+                "time": "2014-01-23T19:52:40.000Z"
+            },
+            {
+                "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y1_geometry.png",
+                "size": 76259,
+                "time": "2014-01-27T11:38:09.000Z"
+            },
+            {
+                "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y1_posdiv_pure.png",
+                "size": 353828,
+                "time": "2014-01-27T11:37:50.000Z"
+            },
+            {
+                "path": "brilliance_refference/brilliance_ifit_4Hsize_3moderator_size_y.m",
+                "size": 3048,
+                "time": "2014-01-23T19:52:33.000Z"
+            },
+            {
+                "path": "brilliance_refference/brilliance_4Hsize_3moderator_size_y1.mat",
+                "size": 11626979,
+                "time": "2014-01-24T07:56:42.000Z"
+            },
+            {
+                "path": "brilliance_refference/brilliance_4Hsize_3moderator_size_y.batch",
+                "size": 671,
+                "time": "2014-01-23T19:52:32.000Z"
+            },
+            {
+                "path": "brilliance_refference/input_used_4Hsize_3moderator_size_y.txt",
+                "size": 358,
+                "time": "2014-01-23T19:52:35.000Z"
+            },
+            {
+                "path": "brilliance_refference/run_brilliance_ifit_4Hsize_3moderator_size_y.m",
+                "size": 53,
+                "time": "2014-01-23T19:52:36.000Z"
+            },
+            {
+                "path": "PGESKSE/PGESKSE_4Hsize_3moderator_size_y.batch",
+                "size": 734,
+                "time": "2014-01-23T19:52:48.000Z"
+            },
+            {
+                "path": "PGESKSE/PGESKSE_4Hsize_3moderator_size_y_ifit.m",
+                "size": 11101,
+                "time": "2014-01-23T19:52:48.000Z"
+            },
+            {
+                "path": "PGESKSE/err_PGESKSE_4Hsize_3moderator_size_y.txt",
+                "size": 0,
+                "time": "2014-01-24T21:13:29.000Z"
+            },
+            {
+                "path": "PGESKSE/run_PGESKSE_4Hsize_3moderator_size_y_ifit.m",
+                "size": 50,
+                "time": "2014-01-23T19:52:51.000Z"
+            },
+            {
+                "path": "PGESKSE/out_PGESKSE_4Hsize_3moderator_size_y.txt",
+                "size": 8681220,
+                "time": "2014-01-25T00:35:58.000Z"
+            },
+            {
+                "path": "PGESKSE/compile_PGESKSE_py.sh",
+                "size": 558,
+                "time": "2014-01-23T19:52:45.000Z"
+            },
+            {
+                "path": "PGESKSE/compile_PGESKSE.sh",
+                "size": 540,
+                "time": "2014-01-23T19:52:45.000Z"
+            },
+            {
+                "path": "PGESKSE/PGESKSE_4Hsize_3moderator_size_y1.par",
+                "size": 918,
+                "time": "2014-01-25T00:35:55.000Z"
+            },
+            {
+                "path": "PGESKSE/PGESKSE_4Hsize_3moderator_size_y1_geometry.dat",
+                "size": 2175,
+                "time": "2014-01-25T00:23:10.000Z"
+            }
+        ]
+    },
+    "ownable": {
+        "ownerGroup": "ess",
+        "accessGroups": ["dmsc"]
+    }
+}
diff --git a/examples/data/ingestion_simulation_dataset_ess_raw_dataset.json b/examples/data/ingestion_simulation_dataset_ess_raw_dataset.json
new file mode 100644
index 0000000..c2b5817
--- /dev/null
+++ b/examples/data/ingestion_simulation_dataset_ess_raw_dataset.json
@@ -0,0 +1,252 @@
+{
+    "id": "0275d813-be6b-444f-812f-b8311d129361",
+    "dataset": {
+        "datasetName": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE raw",
+        "description": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE",
+        "creationLocation": "DMSC",
+        "principalInvestigator": "Max Novelli",
+        "owner": "Massimiliano Novelli",
+        "ownerEmail": "max.novelli@ess.eu",
+        "contactEmail": "max.novelli@ess.eu",
+        "sourceFolder": "/mnt/data/simulation/CAMEA/CAMEA31",
+        "creationTime": "2022-03-07T15:44:59.000Z",
+        "type": "raw",
+        "scientificMetadata": {
+            "sample_width": { "value": 0.015, "unit": "m" },
+            "sample_height": { "value": 0.015, "unit": "m" },
+            "divergence_requirement_horizontal": { "value": 0.75, "unit": "deg" },
+            "divergence_requirement_vertical": { "value": 1, "unit": "deg" },
+            "guide_sample_distance": { "value": 0.6, "unit": "m" },
+            "lower_wavelength_limit": { "value": 1, "unit": "\u00c5" },
+            "upper_wavelength_limit": { "value": 3.6, "unit": "\u00c5" },
+            "moderator_width": { "value": 0.12, "unit": "m" },
+            "moderator_height": { "value": 0.03, "unit": "m" },
+            "moderator_sample_distance": { "value": 170, "unit": "m" },
+            "parsing_variables": { "value": "guide_start , startx1 , starty1 , length1", "unit": "" },
+            "parsing_min_guide_start": { "value": 2.000035881054106, "unit": "m" },
+            "parsing_max_guide_start": { "value": 5.407538318585075, "unit": "m" },
+            "parsing_mean_guide_start": { "value": 2.3475508029429557, "unit": "m" },
+            "parsing_std_guide_start": { "value": 0.5522363822422368, "unit": "m" },
+            "parsing_min_startx1": { "value": 0.006706596967962139, "unit": "m" },
+            "parsing_max_startx1": { "value": 0.1460959338571846, "unit": "m" },
+            "parsing_mean_startx1": { "value": 0.08885675463366878, "unit": "m" },
+            "parsing_std_startx1": { "value": 0.017699812942929365, "unit": "m" },
+            "parsing_min_starty1": { "value": 0.011762187831963904, "unit": "m" },
+            "parsing_max_starty1": { "value": 0.14999127413576652, "unit": "m" },
+            "parsing_mean_starty1": { "value": 0.13009670276273638, "unit": "m" },
+            "parsing_std_starty1": { "value": 0.011522927034872269, "unit": "m" },
+            "parsing_min_length1": { "value": 28.915197821153896, "unit": "" },
+            "parsing_max_length1": { "value": 95.07944574028325, "unit": "" },
+            "parsing_mean_length1": { "value": 64.23126877070395, "unit": "" },
+            "parsing_std_length1": { "value": 10.210341803833671, "unit": "" },
+            "optimization_name": { "value": "PGESKSE", "unit": "" },
+            "configuration_summary": { "value": "PGESKSE", "unit": "" },
+            "best_figure_of_merit": { "value": "0.25293", "unit": "" },
+            "brilliance_transfer": { "value": "0.47344", "unit": "" },
+            "event_file_name_suffix": { "value": "4Hsize_3moderator_size_y", "unit": "" },
+            "number_of_parameters": { "value": 2, "unit": "" },
+            "parameters_name": { "value": "Hsize , moderator_size_y", "unit": "" },
+            "event_writen_present": { "value": true, "unit": "" },
+            "event_writen_file": { "value": "master_record-writen_4Hsize_3moderator_size_y.txt", "unit": "" },
+            "event_writen_timestamp": { "value": "2014-01-23T19:52:38", "unit": "" },
+            "event_done_present": { "value": true, "unit": "" },
+            "event_done_file": { "value": "master_record-done_4Hsize_3moderator_size_y.txt", "unit": "" },
+            "event_done_timestamp": { "value": "2014-01-25T00:35:55", "unit": "" },
+            "event_analysis_present": { "value": true, "unit": "" },
+            "event_analysis_file": { "value": "output/analysis/master_record-analyzed_4Hsize_3moderator_size_y.txt", "unit": "" },
+            "event_analysis_timestamp": { "value": "2014-01-28T14:03:02", "unit": "" },
+            "dataset_name": { "value": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE", "unit": "" },
+            "run_name": { "value": "CAMEA CAMEA31", "unit": "" },
+            "scan_name": { "value": "4Hsize_3moderator_size_y", "unit": "" },
+            "output_file_name_base": { "value": "PGESKSE_4Hsize_3moderator_size_y", "unit": "" },
+            "dataset_access_path": { "value": "/mnt/data/simulation/CAMEA/CAMEA31", "unit": "" },
+            "parameters_structure": { "value": "[{\"name\": \"Hsize\", \"value\": \"1.5\", \"index\": \"4\"}, {\"name\": \"moderator_size_y\", \"value\": \"0.03\", \"index\": \"3\"}]", "unit": "" },
+            "Hsize": { "value": 4, "unit": "cm" },
+            "moderator_size_y": { "value": 3, "unit": "m" }
+        },
+        "techniques": [
+            {
+                "pid": "fe888574-5cc0-11ec-90c3-bf82943dec35",
+                "name": "Simulation"
+            }
+        ],
+        "size": 68386784,
+        "instrumentId": "",
+        "sampleId": "",
+        "proposalId": ""
+    },
+    "orig_datablock": {
+        "size": 68386784,
+        "dataFileList": [
+            {
+                "path": "launch_all.sh",
+                "size": 10171,
+                "time": "2014-01-23T19:52:37.000Z"
+            },
+            {
+                "path": "suggested_reruns-fails.sh",
+                "size": 448,
+                "time": "2014-01-23T19:53:04.000Z"
+            },
+            {
+                "path": "compile_all_py.sh",
+                "size": 273,
+                "time": "2014-01-23T19:52:37.000Z"
+            },
+            {
+                "path": "clean3.sh",
+                "size": 354,
+                "time": "2014-01-25T10:44:54.000Z"
+            },
+            {
+                "path": "master_record-done_4Hsize_3moderator_size_y.txt",
+                "size": 579,
+                "time": "2014-01-25T00:35:55.000Z"
+            },
+            {
+                "path": "master_record-writen_4Hsize_3moderator_size_y.txt",
+                "size": 561,
+                "time": "2014-01-23T19:52:38.000Z"
+            },
+            {
+                "path": "compile_all.sh",
+                "size": 259,
+                "time": "2014-01-23T19:52:37.000Z"
+            },
+            {
+                "path": "output/brill_ref/brilliance_ref_4Hsize_3moderator_size_y.mat",
+                "size": 11624010,
+                "time": "2014-01-24T07:56:45.000Z"
+            },
+            {
+                "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y1_acceptance_ess.png",
+                "size": 521132,
+                "time": "2014-01-27T11:38:06.000Z"
+            },
+            {
+                "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y1_acceptance_pure.png",
+                "size": 518423,
+                "time": "2014-01-27T11:37:52.000Z"
+            },
+            {
+                "path": "output/analysis/master_record-analyzed_4Hsize_3moderator_size_y.txt",
+                "size": 587,
+                "time": "2014-01-28T14:03:02.000Z"
+            },
+            {
+                "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y1_overall_pure.png",
+                "size": 144605,
+                "time": "2014-01-27T11:37:49.000Z"
+            },
+            {
+                "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y1_posdiv_ess.png",
+                "size": 336496,
+                "time": "2014-01-27T11:38:04.000Z"
+            },
+            {
+                "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y_all.mat",
+                "size": 34321077,
+                "time": "2014-01-25T00:35:55.000Z"
+            },
+            {
+                "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y1_overall_ess.png",
+                "size": 127660,
+                "time": "2014-01-27T11:38:02.000Z"
+            },
+            {
+                "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y1_geometry.dat",
+                "size": 2175,
+                "time": "2014-01-25T00:23:10.000Z"
+            },
+            {
+                "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y_ifit_analyse.m",
+                "size": 19482,
+                "time": "2014-01-23T19:52:40.000Z"
+            },
+            {
+                "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y1_geometry.png",
+                "size": 76259,
+                "time": "2014-01-27T11:38:09.000Z"
+            },
+            {
+                "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y1_posdiv_pure.png",
+                "size": 353828,
+                "time": "2014-01-27T11:37:50.000Z"
+            },
+            {
+                "path": "brilliance_refference/brilliance_ifit_4Hsize_3moderator_size_y.m",
+                "size": 3048,
+                "time": "2014-01-23T19:52:33.000Z"
+            },
+            {
+                "path": "brilliance_refference/brilliance_4Hsize_3moderator_size_y1.mat",
+                "size": 11626979,
+                "time": "2014-01-24T07:56:42.000Z"
+            },
+            {
+                "path": "brilliance_refference/brilliance_4Hsize_3moderator_size_y.batch",
+                "size": 671,
+                "time": "2014-01-23T19:52:32.000Z"
+            },
+            {
+                "path": "brilliance_refference/input_used_4Hsize_3moderator_size_y.txt",
+                "size": 358,
+                "time": "2014-01-23T19:52:35.000Z"
+            },
+            {
+                "path": "brilliance_refference/run_brilliance_ifit_4Hsize_3moderator_size_y.m",
+                "size": 53,
+                "time": "2014-01-23T19:52:36.000Z"
+            },
+            {
+                "path": "PGESKSE/PGESKSE_4Hsize_3moderator_size_y.batch",
+                "size": 734,
+                "time": "2014-01-23T19:52:48.000Z"
+            },
+            {
+                "path": "PGESKSE/PGESKSE_4Hsize_3moderator_size_y_ifit.m",
+                "size": 11101,
+                "time": "2014-01-23T19:52:48.000Z"
+            },
+            {
+                "path": "PGESKSE/err_PGESKSE_4Hsize_3moderator_size_y.txt",
+                "size": 0,
+                "time": "2014-01-24T21:13:29.000Z"
+            },
+            {
+                "path": "PGESKSE/run_PGESKSE_4Hsize_3moderator_size_y_ifit.m",
+                "size": 50,
+                "time": "2014-01-23T19:52:51.000Z"
+            },
+            {
+                "path": "PGESKSE/out_PGESKSE_4Hsize_3moderator_size_y.txt",
+                "size": 8681220,
+                "time": "2014-01-25T00:35:58.000Z"
+            },
+            {
+                "path": "PGESKSE/compile_PGESKSE_py.sh",
+                "size": 558,
+                "time": "2014-01-23T19:52:45.000Z"
+            },
+            {
+                "path": "PGESKSE/compile_PGESKSE.sh",
+                "size": 540,
+                "time": "2014-01-23T19:52:45.000Z"
+            },
+            {
+                "path": "PGESKSE/PGESKSE_4Hsize_3moderator_size_y1.par",
+                "size": 918,
+                "time": "2014-01-25T00:35:55.000Z"
+            },
+            {
+                "path": "PGESKSE/PGESKSE_4Hsize_3moderator_size_y1_geometry.dat",
+                "size": 2175,
+                "time": "2014-01-25T00:23:10.000Z"
+            }
+        ]
+    },
+    "ownable": {
+        "ownerGroup": "ess",
+        "accessGroups": ["dmsc"]
+    }
+}
diff --git a/examples/data/published_data.json b/examples/data/published_data.json
new file mode 100644
index 0000000..54a573f
--- /dev/null
+++ b/examples/data/published_data.json
@@ -0,0 +1,56 @@
+[
+    {
+      "doi": "10.17199/03dd9804-1b04-4d36-b0fb-cf66e9891e7d",
+      "affiliation": "ESS",
+      "creator": [
+        "Oliver Lohmann"
+      ],
+      "publisher": "ESS",
+      "publicationYear": 2019,
+      "title": "SANS/Reflectometry",
+      "url": "",
+      "abstract": "SANS/Reflectometry",
+      "dataDescription": "https://github.com/ess-dmsc/ess_file_formats/wiki/NeXus",
+      "resourceType": "NeXus HDF5",
+      "numberOfFiles": null,
+      "sizeOfArchive": null,
+      "pidArray": [
+        "20.500.12269/0a269002-83e2-4f18-bb98-36c01836d66a"
+      ],
+      "authors": [
+        "Oliver Lohmann"
+      ],
+      "registeredTime": "2020-09-01T14:16:15.552Z",
+      "status": "registered",
+      "thumbnail": "",
+      "createdBy": "admin",
+      "updatedBy": "admin",
+      "createdAt": "2020-01-03T19:38:34.203Z",
+      "updatedAt": "2020-09-09T09:37:58.023Z"
+    },
+    {
+      "doi": "10.17199/165f8a52-c15d-4c96-ad7d-fb0cbe969f66",
+      "creator": [
+        "Peter Kadletz"
+      ],
+      "publisher": "ESS",
+      "publicationYear": 2020,
+      "title": "Final bte",
+      "url": "",
+      "abstract": "Peter Kadletz, Tobias Richter",
+      "dataDescription": "https://github.com/ess-dmsc/ess_file_formats/wiki/NeXus",
+      "resourceType": "raw",
+      "numberOfFiles": null,
+      "sizeOfArchive": null,
+      "pidArray": [
+        "20.500.12269/2511nicos_00002511.hdf"
+      ],
+      "registeredTime": "2020-09-01T14:16:17.272Z",
+      "status": "registered",
+      "scicatUser": "ingestor",
+      "thumbnail": "",
+      "updatedBy": "admin",
+      "createdAt": "2022-06-03T11:16:09.681Z",
+      "updatedAt": "2020-09-09T09:37:58.094Z"
+    }
+]
diff --git a/examples/ingestion_simulation_dataset_ess.py b/examples/ingestion_simulation_dataset_ess.py
new file mode 100644
index 0000000..96c57c6
--- /dev/null
+++ b/examples/ingestion_simulation_dataset_ess.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# ingestion_simulation_dataset_ess
+#
+# Ingest the example simulation dataset in the specified scicat instance
+# This script is provided as is, and as an example in pyScicat documentation
+#
+#
+# Create by: Max Novelli
+#            max.novelli@ess.eu
+#            European Spallation Source ERIC,
+#            P.O. Box 176,
+#            SE-221 00, Lund, Sweden
+#
+#
+
+
+# libraries
+import json
+import pyscicat.client as pyScClient
+import pyscicat.model as pyScModel
+
+
+# scicat configuration file
+# includes scicat instance URL
+# scicat user and password
+scicat_configuration_file = "./data/ingestion_simulation_dataset_ess_config.json"
+simulation_dataset_file = "./data/ingestion_simulation_dataset_ess.json"
+
+
+# loads scicat configuration
+with open(scicat_configuration_file, "r") as fh:
+    scicat_config = json.load(fh)
+
+
+# loads simulation information from matching json file
+with open(simulation_dataset_file, "r") as fh:
+    dataset_information = json.load(fh)
+
+# instantiate a pySciCat client
+scClient = pyScClient.ScicatClient(
+    base_url=scicat_config["scicat"]["host"],
+    username=scicat_config["scicat"]["username"],
+    password=scicat_config["scicat"]["password"],
+)
+
+# create an owneable object to be used with all the other models
+# all the fields are retrieved directly from the simulation information
+ownable = pyScModel.Ownable(**dataset_information["ownable"])
+
+
+# create dataset object from the pyscicat model
+# includes ownable from previous step
+dataset = pyScModel.RawDataset(**dataset_information["dataset"], **ownable.dict())
+
+
+# create dataset entry in scicat
+# it returns the full dataset information, including the dataset pid assigned automatically by scicat
+created_dataset = scClient.upload_new_dataset(dataset)
+
+
+# create origdatablock object from pyscicat model
+origDataBlock = pyScModel.OrigDatablock(
+    size=dataset_information["orig_datablock"]["size"],
+    datasetId=created_dataset["pid"],
+    dataFileList=[
+        pyScModel.DataFile(**file)
+        for file in dataset_information["orig_datablock"]["dataFileList"]
+    ],
+    **ownable.dict()
+)
+
+# create origDatablock associated with dataset in SciCat
+# it returns the full object including SciCat id assigned when created
+created_orig_datablock = scClient.upload_dataset_origdatablock(origDataBlock)
diff --git a/pyscicat/_version.py b/pyscicat/_version.py
index 6977658..bae1847 100644
--- a/pyscicat/_version.py
+++ b/pyscicat/_version.py
@@ -1,11 +1,13 @@
+
 # This file helps to compute a version number in source trees obtained from
 # git-archive tarball (such as those provided by githubs download-from-tag
 # feature). Distribution tarballs (built by setup.py sdist) and build
 # directories (produced by setup.py build) will contain a much shorter file
 # that just contains the computed version number.
 
-# This file is released into the public domain. Generated by
-# versioneer-0.18 (https://github.com/warner/python-versioneer)
+# This file is released into the public domain.
+# Generated by versioneer-0.28
+# https://github.com/python-versioneer/python-versioneer
 
 """Git implementation of _version.py."""
 
@@ -14,6 +16,8 @@
 import re
 import subprocess
 import sys
+from typing import Callable, Dict
+import functools
 
 
 def get_keywords():
@@ -51,40 +55,44 @@ class NotThisMethod(Exception):
     """Exception raised if a method is not valid for the current scenario."""
 
 
-LONG_VERSION_PY = {}
-HANDLERS = {}
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
 
 
 def register_vcs_handler(vcs, method):  # decorator
-    """Decorator to mark a method as the handler for a particular VCS."""
-
+    """Create decorator to mark a method as the handler of a VCS."""
     def decorate(f):
         """Store f in HANDLERS[vcs][method]."""
         if vcs not in HANDLERS:
             HANDLERS[vcs] = {}
         HANDLERS[vcs][method] = f
         return f
-
     return decorate
 
 
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None):
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
     """Call the given command(s)."""
     assert isinstance(commands, list)
-    p = None
-    for c in commands:
+    process = None
+
+    popen_kwargs = {}
+    if sys.platform == "win32":
+        # This hides the console window if pythonw.exe is used
+        startupinfo = subprocess.STARTUPINFO()
+        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+        popen_kwargs["startupinfo"] = startupinfo
+
+    for command in commands:
         try:
-            dispcmd = str([c] + args)
+            dispcmd = str([command] + args)
             # remember shell=False, so use git.cmd on windows, not just git
-            p = subprocess.Popen(
-                [c] + args,
-                cwd=cwd,
-                env=env,
-                stdout=subprocess.PIPE,
-                stderr=(subprocess.PIPE if hide_stderr else None),
-            )
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None), **popen_kwargs)
             break
-        except EnvironmentError:
+        except OSError:
             e = sys.exc_info()[1]
             if e.errno == errno.ENOENT:
                 continue
@@ -96,15 +104,13 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=
         if verbose:
             print("unable to find command, tried %s" % (commands,))
         return None, None
-    stdout = p.communicate()[0].strip()
-    if sys.version_info[0] >= 3:
-        stdout = stdout.decode()
-    if p.returncode != 0:
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
         if verbose:
             print("unable to run %s (error)" % dispcmd)
             print("stdout was %s" % stdout)
-        return None, p.returncode
-    return stdout, p.returncode
+        return None, process.returncode
+    return stdout, process.returncode
 
 
 def versions_from_parentdir(parentdir_prefix, root, verbose):
@@ -116,25 +122,18 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
     """
     rootdirs = []
 
-    for i in range(3):
+    for _ in range(3):
         dirname = os.path.basename(root)
         if dirname.startswith(parentdir_prefix):
-            return {
-                "version": dirname[len(parentdir_prefix) :],
-                "full-revisionid": None,
-                "dirty": False,
-                "error": None,
-                "date": None,
-            }
-        else:
-            rootdirs.append(root)
-            root = os.path.dirname(root)  # up a level
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
 
     if verbose:
-        print(
-            "Tried directories %s but none started with prefix %s"
-            % (str(rootdirs), parentdir_prefix)
-        )
+        print("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix))
     raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
 
 
@@ -147,22 +146,21 @@ def git_get_keywords(versionfile_abs):
     # _version.py.
     keywords = {}
     try:
-        f = open(versionfile_abs, "r")
-        for line in f.readlines():
-            if line.strip().startswith("git_refnames ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["refnames"] = mo.group(1)
-            if line.strip().startswith("git_full ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["full"] = mo.group(1)
-            if line.strip().startswith("git_date ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["date"] = mo.group(1)
-        f.close()
-    except EnvironmentError:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
         pass
     return keywords
 
@@ -170,10 +168,14 @@ def git_get_keywords(versionfile_abs):
 @register_vcs_handler("git", "keywords")
 def git_versions_from_keywords(keywords, tag_prefix, verbose):
     """Get version information from git keywords."""
-    if not keywords:
-        raise NotThisMethod("no keywords at all, weird")
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
     date = keywords.get("date")
     if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
         # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
         # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
         # -like" string, which we must then edit to make compliant), because
@@ -186,11 +188,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
         if verbose:
             print("keywords are unexpanded, not using")
         raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
-    refs = set([r.strip() for r in refnames.strip("()").split(",")])
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
     TAG = "tag: "
-    tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)])
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
     if not tags:
         # Either we're using git < 1.8.3, or there really are no tags. We use
         # a heuristic: assume all version tags have a digit. The old git %d
@@ -199,7 +201,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
         # between branches and tags. By ignoring refnames without digits, we
         # filter out many common branch names like "release" and
         # "stabilization", as well as "HEAD" and "master".
-        tags = set([r for r in refs if re.search(r"\d", r)])
+        tags = {r for r in refs if re.search(r'\d', r)}
         if verbose:
             print("discarding '%s', no digits" % ",".join(refs - tags))
     if verbose:
@@ -207,30 +209,28 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
     for ref in sorted(tags):
         # sorting will prefer e.g. "2.0" over "2.0rc1"
         if ref.startswith(tag_prefix):
-            r = ref[len(tag_prefix) :]
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
             if verbose:
                 print("picking %s" % r)
-            return {
-                "version": r,
-                "full-revisionid": keywords["full"].strip(),
-                "dirty": False,
-                "error": None,
-                "date": date,
-            }
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
     # no suitable tags, so version is "0+unknown", but full hex is still there
     if verbose:
         print("no suitable tags, using unknown + full revision id")
-    return {
-        "version": "0+unknown",
-        "full-revisionid": keywords["full"].strip(),
-        "dirty": False,
-        "error": "no suitable tags",
-        "date": None,
-    }
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
 
 
 @register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
     """Get version from 'git describe' in the root of the source tree.
 
     This only gets called if the git-archive 'subst' keywords were *not*
@@ -241,7 +241,15 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
     if sys.platform == "win32":
         GITS = ["git.cmd", "git.exe"]
 
-    out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True)
+    # GIT_DIR can interfere with correct operation of Versioneer.
+    # It may be intended to be passed to the Versioneer-versioned project,
+    # but that should not change where we get our version from.
+    env = os.environ.copy()
+    env.pop("GIT_DIR", None)
+    runner = functools.partial(runner, env=env)
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=not verbose)
     if rc != 0:
         if verbose:
             print("Directory %s not under git control" % root)
@@ -249,24 +257,15 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
 
     # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
     # if there isn't one, this yields HEX[-dirty] (no NUM)
-    describe_out, rc = run_command(
-        GITS,
-        [
-            "describe",
-            "--tags",
-            "--dirty",
-            "--always",
-            "--long",
-            "--match",
-            "%s*" % tag_prefix,
-        ],
-        cwd=root,
-    )
+    describe_out, rc = runner(GITS, [
+        "describe", "--tags", "--dirty", "--always", "--long",
+        "--match", f"{tag_prefix}[[:digit:]]*"
+    ], cwd=root)
     # --long was added in git-1.5.5
     if describe_out is None:
         raise NotThisMethod("'git describe' failed")
     describe_out = describe_out.strip()
-    full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
     if full_out is None:
         raise NotThisMethod("'git rev-parse' failed")
     full_out = full_out.strip()
@@ -276,6 +275,39 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
     pieces["short"] = full_out[:7]  # maybe improved later
     pieces["error"] = None
 
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
     # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
     # TAG might have hyphens.
     git_describe = describe_out
@@ -284,16 +316,17 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
     dirty = git_describe.endswith("-dirty")
     pieces["dirty"] = dirty
     if dirty:
-        git_describe = git_describe[: git_describe.rindex("-dirty")]
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
 
     # now we have TAG-NUM-gHEX or HEX
 
     if "-" in git_describe:
         # TAG-NUM-gHEX
-        mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
         if not mo:
-            # unparseable. Maybe git-describe is misbehaving?
-            pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
             return pieces
 
         # tag
@@ -302,12 +335,10 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
             if verbose:
                 fmt = "tag '%s' doesn't start with prefix '%s'"
                 print(fmt % (full_tag, tag_prefix))
-            pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (
-                full_tag,
-                tag_prefix,
-            )
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
             return pieces
-        pieces["closest-tag"] = full_tag[len(tag_prefix) :]
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
 
         # distance: number of commits since tag
         pieces["distance"] = int(mo.group(2))
@@ -318,13 +349,14 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
     else:
         # HEX: no tags
         pieces["closest-tag"] = None
-        count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
-        pieces["distance"] = int(count_out)  # total number of commits
+        out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
+        pieces["distance"] = len(out.split())  # total number of commits
 
     # commit date: see ISO-8601 comment in git_versions_from_keywords()
-    date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[
-        0
-    ].strip()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
     pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
 
     return pieces
@@ -355,25 +387,74 @@ def render_pep440(pieces):
                 rendered += ".dirty"
     else:
         # exception #1
-        rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
         if pieces["dirty"]:
             rendered += ".dirty"
     return rendered
 
 
-def render_pep440_pre(pieces):
-    """TAG[.post.devDISTANCE] -- No -dirty.
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
 
     Exceptions:
-    1: no tags. 0.post.devDISTANCE
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
     """
     if pieces["closest-tag"]:
         rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
         if pieces["distance"]:
-            rendered += ".post.dev%d" % pieces["distance"]
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%d" % (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
     else:
         # exception #1
-        rendered = "0.post.dev%d" % pieces["distance"]
+        rendered = "0.post0.dev%d" % pieces["distance"]
     return rendered
 
 
@@ -404,12 +485,41 @@ def render_pep440_post(pieces):
     return rendered
 
 
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
 def render_pep440_old(pieces):
     """TAG[.postDISTANCE[.dev0]] .
 
     The ".dev0" means dirty.
 
-    Eexceptions:
+    Exceptions:
     1: no tags. 0.postDISTANCE[.dev0]
     """
     if pieces["closest-tag"]:
@@ -469,23 +579,25 @@ def render_git_describe_long(pieces):
 def render(pieces, style):
     """Render the given version pieces into the requested style."""
     if pieces["error"]:
-        return {
-            "version": "unknown",
-            "full-revisionid": pieces.get("long"),
-            "dirty": None,
-            "error": pieces["error"],
-            "date": None,
-        }
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
 
     if not style or style == "default":
         style = "pep440"  # the default
 
     if style == "pep440":
         rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
     elif style == "pep440-pre":
         rendered = render_pep440_pre(pieces)
     elif style == "pep440-post":
         rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
     elif style == "pep440-old":
         rendered = render_pep440_old(pieces)
     elif style == "git-describe":
@@ -495,13 +607,9 @@ def render(pieces, style):
     else:
         raise ValueError("unknown style '%s'" % style)
 
-    return {
-        "version": rendered,
-        "full-revisionid": pieces["long"],
-        "dirty": pieces["dirty"],
-        "error": None,
-        "date": pieces.get("date"),
-    }
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
 
 
 def get_versions():
@@ -515,7 +623,8 @@ def get_versions():
     verbose = cfg.verbose
 
     try:
-        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose)
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
     except NotThisMethod:
         pass
 
@@ -524,16 +633,13 @@ def get_versions():
         # versionfile_source is the relative path from the top of the source
         # tree (where the .git directory might live) to this file. Invert
         # this to find the root from __file__.
-        for i in cfg.versionfile_source.split("/"):
+        for _ in cfg.versionfile_source.split('/'):
             root = os.path.dirname(root)
     except NameError:
-        return {
-            "version": "0+unknown",
-            "full-revisionid": None,
-            "dirty": None,
-            "error": "unable to find root of source tree",
-            "date": None,
-        }
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
 
     try:
         pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
@@ -547,10 +653,6 @@ def get_versions():
     except NotThisMethod:
         pass
 
-    return {
-        "version": "0+unknown",
-        "full-revisionid": None,
-        "dirty": None,
-        "error": "unable to compute version",
-        "date": None,
-    }
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
diff --git a/pyscicat/client.py b/pyscicat/client.py
index f881768..1a06ac5 100644
--- a/pyscicat/client.py
+++ b/pyscicat/client.py
@@ -5,12 +5,22 @@
 import hashlib
 import logging
 import json
-from typing import List
-import urllib
+import re
+from typing import Optional
+from urllib.parse import urljoin, quote_plus
 
+from pydantic import BaseModel
 import requests
 
-from pyscicat.model import Attachment, Datablock, Dataset, RawDataset, DerivedDataset
+
+from pyscicat.model import (
+    Attachment,
+    Dataset,
+    Instrument,
+    OrigDatablock,
+    Proposal,
+    Sample,
+)
 
 logger = logging.getLogger("splash_ingest")
 can_debug = logger.isEnabledFor(logging.DEBUG)
@@ -40,7 +50,7 @@ class ScicatClient:
 
     def __init__(
         self,
-        base_url: str = None,
+        base_url: str,
         token: str = False,
         username: str = None,
         password: str = None,
@@ -69,225 +79,411 @@ def __init__(
         self._username = username  # default username
         self._password = password  # default password
         self._token = token  # store token here
-        assert self._base_url is not None, "SciCat database URL must be provided"
-
-        logger.info(f"Starting ingestor talking to scicat at: {self._base_url}")
+        self._headers = {}  # store headers
 
         if not self._token:
             assert (self._username is not None) and (
                 self._password is not None
             ), "SciCat login credentials (username, password) must be provided if token is not provided"
             self._token = get_token(self._base_url, self._username, self._password)
+            self._headers["Authorization"] = "Bearer {}".format(self._token)
 
-    def _send_to_scicat(self, url, dataDict=None, cmd="post"):
+    def _send_to_scicat(self, cmd: str, endpoint: str, data: BaseModel = None):
         """sends a command to the SciCat API server using url and token, returns the response JSON
         Get token with the getToken method"""
-        if cmd == "post":
-            response = requests.post(
-                url,
-                params={"access_token": self._token},
-                json=dataDict,
-                timeout=self._timeout_seconds,
-                stream=False,
-                verify=True,
-            )
-        elif cmd == "delete":
-            response = requests.delete(
-                url,
-                params={"access_token": self._token},
-                timeout=self._timeout_seconds,
-                stream=False,
-            )
-        elif cmd == "get":
-            response = requests.get(
-                url,
-                params={"access_token": self._token},
-                json=dataDict,
-                timeout=self._timeout_seconds,
-                stream=False,
-            )
-        elif cmd == "patch":
-            response = requests.patch(
-                url,
-                params={"access_token": self._token},
-                json=dataDict,
-                timeout=self._timeout_seconds,
-                stream=False,
-            )
-        return response
-
-    #  Future support for samples
-    # def upload_sample(self, sample):
-    #     sample = {
-    #         "sampleId": projected_start_doc.get('sample_id'),
-    #         "owner": projected_start_doc.get('pi_name'),
-    #         "description": projected_start_doc.get('sample_name'),
-    #         "createdAt": datetime.isoformat(datetime.utcnow()) + "Z",
-    #         "sampleCharacteristics": {},
-    #         "isPublished": False,
-    #         "ownerGroup": owner_group,
-    #         "accessGroups": access_groups,
-    #         "createdBy": self._username,
-    #         "updatedBy": self._username,
-    #         "updatedAt": datetime.isoformat(datetime.utcnow()) + "Z"
-    #     }
-    #     sample_url = f'{self._base_url}Samples'
-
-    #     resp = self._send_to_scicat(sample_url, sample)
-    #     if not resp.ok:  # can happen if sample id is a duplicate, but we can't tell that from the response
-    #         err = resp.json()["error"]
-    #         raise ScicatCommError(f"Error creating Sample {err}")
-
-    def upload_dataset(self, dataset: Dataset) -> str:
-        """Upload a raw or derived dataset (method is autosensing)
+        return requests.request(
+            method=cmd,
+            url=urljoin(self._base_url, endpoint),
+            json=data.dict(exclude_none=True) if data is not None else None,
+            params={"access_token": self._token},
+            headers=self._headers,
+            timeout=self._timeout_seconds,
+            stream=False,
+            verify=True,
+        )
+
+    def _call_endpoint(
+        self,
+        cmd: str,
+        endpoint: str,
+        data: BaseModel = None,
+        operation: str = "",
+        allow_404=False,
+    ) -> Optional[dict]:
+        response = self._send_to_scicat(cmd=cmd, endpoint=endpoint, data=data)
+        result = response.json()
+        if not response.ok:
+            err = result.get("error", {})
+            if (
+                allow_404
+                and response.status_code == 404
+                and re.match(r"Unknown (.+ )?id", err.get("message", ""))
+            ):
+                # The operation failed but because the object does not exist in SciCat.
+                logger.error("Error in operation %s: %s", operation, err)
+                return None
+            raise ScicatCommError(f"Error in operation {operation}: {err}")
+        logger.info(
+            "Operation '%s' successful%s",
+            operation,
+            f"pid={result['pid']}" if "pid" in result else "",
+        )
+        return result
+
+    def datasets_create(self, dataset: Dataset) -> str:
+        """
+        Upload a new dataset. Uses the generic dataset endpoint.
+        Relies on the endpoint to sense the dataset type
+        This function was renamed.
+        It is still accessible with the original name for backward compatibility
+        The original name were create_dataset and upload_new_dataset
 
         Parameters
         ----------
         dataset : Dataset
-            Dataset to load
+            Dataset to create
 
         Returns
         -------
         str
-            pid (or unique identifier) of the newly created dataset
+            pid of the dataset
 
         Raises
         ------
         ScicatCommError
             Raises if a non-20x message is returned
         """
-        if isinstance(dataset, RawDataset):
-            dataset_url = self._base_url + "RawDataSets/replaceOrCreate"
-        elif isinstance(dataset, DerivedDataset):
-            dataset_url = self._base_url + "DerivedDatasets/replaceOrCreate"
-        else:
-            logging.error(
-                "Dataset type not recognized (not Derived or Raw dataset instances)"
-            )
-        resp = self._send_to_scicat(dataset_url, dataset.dict(exclude_none=True))
-        if not resp.ok:
-            err = resp.json()["error"]
-            raise ScicatCommError(f"Error creating dataset {err}")
-        new_pid = resp.json().get("pid")
-        logger.info(f"new dataset created {new_pid}")
-        return new_pid
-
-    def upload_raw_dataset(self, dataset: Dataset) -> str:
-        """Upload a raw dataset
+        return self._call_endpoint(
+            cmd="post", endpoint="Datasets", data=dataset, operation="datasets_create"
+        ).get("pid")
+
+    """
+        Upload a new dataset
+        Original name, kept for for backward compatibility
+    """
+    upload_new_dataset = datasets_create
+    create_dataset = datasets_create
+
+    def datasets_update(self, dataset: Dataset, pid: str) -> str:
+        """Updates an existing dataset
+        This function was renamed.
+        It is still accessible with the original name for backward compatibility
+        The original name was update_dataset.
 
         Parameters
         ----------
         dataset : Dataset
-            Dataset to load
+            Dataset to update
+
+        pid
+            pid (or unique identifier) of dataset being updated
 
         Returns
         -------
         str
-            pid (or unique identifier) of the newly created dataset
+            pid (or unique identifier) of the dataset
+        Raises
+        ------
+        ScicatCommError
+            Raises if a non-20x message is returned
+        """
+        return self._call_endpoint(
+            cmd="patch",
+            endpoint=f"Datasets/{quote_plus(pid)}",
+            data=dataset,
+            operation="datasets_update",
+        ).get("pid")
+
+    """
+        Update a dataset
+        Original name, kept for for backward compatibility
+    """
+    update_dataset = datasets_update
+
+    def datasets_origdatablock_create(self, origdatablock: OrigDatablock) -> dict:
+        """
+        Create a new SciCat Dataset OrigDatablock
+        This function has been renamed.
+        It is still accessible with the original name for backward compatibility
+        The original names were create_dataset_origdatablock and upload_dataset_origdatablock
+
+        Parameters
+        ----------
+        origdatablock :
+            The OrigDatablock to create
+
+        Returns
+        -------
+        dict
+            The created OrigDatablock with id
 
         Raises
         ------
         ScicatCommError
             Raises if a non-20x message is returned
+
         """
-        raw_dataset_url = self._base_url + "RawDataSets/replaceOrCreate"
-        resp = self._send_to_scicat(raw_dataset_url, dataset.dict(exclude_none=True))
-        if not resp.ok:
-            err = resp.json()["error"]
-            raise ScicatCommError(f"Error creating raw dataset {err}")
-        new_pid = resp.json().get("pid")
-        logger.info(f"new dataset created {new_pid}")
-        return new_pid
+        endpoint = f"Datasets/{quote_plus(origdatablock.datasetId)}/origdatablocks"
+        return self._call_endpoint(
+            cmd="post",
+            endpoint=endpoint,
+            data=origdatablock,
+            operation="datasets_origdatablock_create",
+        )
 
-    def upload_derived_dataset(self, dataset: Dataset) -> str:
-        """Upload a derived dataset
+    """
+        Create a new SciCat Dataset OrigDatablock
+        Original name, kept for for backward compatibility
+    """
+    upload_dataset_origdatablock = datasets_origdatablock_create
+    create_dataset_origdatablock = datasets_origdatablock_create
+
+    def datasets_attachment_create(
+        self, attachment: Attachment, datasetType: str = "Datasets"
+    ) -> dict:
+        """
+        Create a new Attachment for a dataset.
+        Note that datasetType can be provided to determine the type of dataset
+        that this attachment is attached to. This is required for creating the url that SciCat uses.
+        This function has been renamed.
+        It is still accessible with the original name for backward compatibility
+        The original names were create_dataset_attachment and upload_attachment
 
         Parameters
         ----------
-        dataset : Dataset
-            Dataset to upload
+        attachment : Attachment
+            Attachment to upload
+
+        datasetType : str
+            Type of dataset to upload to, default is `Datasets`
+        Raises
+        ------
+        ScicatCommError
+            Raises if a non-20x message is returned
+        """
+        endpoint = f"{datasetType}/{quote_plus(attachment.datasetId)}/attachments"
+        return self._call_endpoint(
+            cmd="post",
+            endpoint=endpoint,
+            data=attachment,
+            operation="datasets_attachment_create",
+        )
+
+    """
+        Create a new attachement for a dataset
+        Original name, kept for for backward compatibility
+    """
+    upload_attachment = datasets_attachment_create
+    create_dataset_attachment = datasets_attachment_create
+
+    def samples_create(self, sample: Sample) -> str:
+        """
+        Create a new sample.
+        An error is raised when a sample with the same sampleId already exists.
+        This function is also accessible as upload_sample.
+
+
+        Parameters
+        ----------
+        sample : Sample
+            Sample to upload
 
         Returns
         -------
         str
-            pid (or unique identifier) of the newly created dataset
+            ID of the newly created sample
 
         Raises
         ------
         ScicatCommError
             Raises if a non-20x message is returned
         """
-        derived_dataset_url = self._base_url + "DerivedDataSets/replaceOrCreate"
-        resp = self._send_to_scicat(
-            derived_dataset_url, dataset.dict(exclude_none=True)
-        )
-        if not resp.ok:
-            err = resp.json()["error"]
-            raise ScicatCommError(f"Error creating raw dataset {err}")
-        new_pid = resp.json().get("pid")
-        logger.info(f"new dataset created {new_pid}")
-        return new_pid
+        return self._call_endpoint(
+            cmd="post",
+            endpoint="Samples",
+            data=sample,
+            operation="samples_create",
+        ).get("sampleId")
+
+    upload_sample = samples_create
+
+    def samples_update(self, sample: Sample, sampleId: str = None) -> str:
+        """Updates an existing sample
+
+        Parameters
+        ----------
+        sample : Sample
+            Sample to update
+
+        sampleId
+            ID of sample being updated. By default, ID is taken from sample parameter.
+
+        Returns
+        -------
+        str
+            ID of the sample
+
+        Raises
+        ------
+        ScicatCommError
+            Raises if a non-20x message is returned
+
+        AssertionError
+            Raises if no ID is provided
+        """
+        if sampleId is None:
+            assert sample.sampleId is not None, "sampleId should not be None"
+            sampleId = sample.sampleId
+        sample.sampleId = None
+        return self._call_endpoint(
+            cmd="patch",
+            endpoint=f"Samples/{quote_plus(sampleId)}",
+            data=sample,
+            operation="samples_update",
+        ).get("sampleId")
+
+    def instruments_create(self, instrument: Instrument):
+        """
+        Create a new instrument.
+        Note that in SciCat admin rights are required to upload instruments.
+        An error is raised when an instrument with the same pid already exists.
+        This function is also accessible as upload_instrument.
 
-    def upload_datablock(self, datablock: Datablock, datasetType: str = "RawDatasets"):
-        """Upload a Datablock
 
         Parameters
         ----------
-        datablock : Datablock
-            Datablock to upload
+        instrument : Instrument
+            Instrument to upload
+
+        Returns
+        -------
+        str
+            pid (or unique identifier) of the newly created instrument
 
         Raises
         ------
         ScicatCommError
             Raises if a non-20x message is returned
         """
+        return self._call_endpoint(
+            cmd="post",
+            endpoint="Instruments",
+            data=instrument,
+            operation="instruments_create",
+        ).get("pid")
 
-        url = (
-            self._base_url
-            + f"{datasetType}/{urllib.parse.quote_plus(datablock.datasetId)}/origdatablocks"
-        )
-        resp = self._send_to_scicat(url, datablock.dict(exclude_none=True))
-        if not resp.ok:
-            err = resp.json()["error"]
-            raise ScicatCommError(f"Error creating datablock. {err}")
+    upload_instrument = instruments_create
 
-    def upload_attachment(
-        self, attachment: Attachment, datasetType: str = "RawDatasets"
-    ):
-        """Upload an Attachment.  Note that datasetType can be provided to determine the type of dataset
-        that this attachment is attached to. This is required for creating the url that SciCat uses.
+    def instruments_update(self, instrument: Instrument, pid: str = None) -> str:
+        """Updates an existing instrument.
+        Note that in SciCat admin rights are required to upload instruments.
 
         Parameters
         ----------
-        attachment : Attachment
-            Attachment to upload
+        instrument : Instrument
+            Instrument to update
+
+        pid
+            pid (or unique identifier) of instrument being updated.
+            By default, pid is taken from instrument parameter.
+
+        Returns
+        -------
+        str
+            ID of the instrument
 
-        datasetType : str
-            Type of dataset to upload to, default is `RawDatasets`
         Raises
         ------
         ScicatCommError
             Raises if a non-20x message is returned
+
+        AssertionError
+            Raises if no ID is provided
         """
-        url = (
-            self._base_url
-            + f"{datasetType}/{urllib.parse.quote_plus(attachment.datasetId)}/attachments"
-        )
-        logging.debug(url)
-        resp = requests.post(
-            url,
-            params={"access_token": self._token},
-            timeout=self._timeout_seconds,
-            stream=False,
-            json=attachment.dict(exclude_none=True),
-            verify=True,
-        )
-        if not resp.ok:
-            err = resp.json()["error"]
-            raise ScicatCommError(f"Error  uploading thumbnail. {err}")
+        if pid is None:
+            assert instrument.pid is not None, "pid should not be None"
+            pid = instrument.pid
+        instrument.pid = None
+        return self._call_endpoint(
+            cmd="patch",
+            endpoint=f"Instruments/{quote_plus(pid)}",
+            data=instrument,
+            operation="instruments_update",
+        ).get("pid")
+
+    def proposals_create(self, proposal: Proposal):
+        """
+        Create a new proposal.
+        Note that in SciCat admin rights are required to upload proposals.
+        An error is raised when a proposal with the same proposalId already exists.
+        This function is also accessible as upload_proposal.
+
 
-    def get_datasets_full_query(self, skip=0, limit=25, query_fields=None):
-        """Gets datasets using the fullQuery mechanism of SciCat. This is
+        Parameters
+        ----------
+        proposal : Proposal
+            Proposal to upload
+
+        Returns
+        -------
+        str
+            ID of the newly created proposal
+
+        Raises
+        ------
+        ScicatCommError
+            Raises if a non-20x message is returned
+        """
+        return self._call_endpoint(
+            cmd="post",
+            endpoint="Proposals",
+            data=proposal,
+            operation="proposals_create",
+        ).get("proposalId")
+
+    upload_proposal = proposals_create
+
+    def proposals_update(self, proposal: Proposal, proposalId: str = None) -> str:
+        """Updates an existing proposal.
+        Note that in SciCat admin rights are required to upload proposals.
+
+        Parameters
+        ----------
+        proposal : Proposal
+            Proposal to update
+
+        proposalId
+            ID of proposal being updated. By default, this is taken from proposal parameter.
+
+        Returns
+        -------
+        str
+            ID of the proposal
+
+        Raises
+        ------
+        ScicatCommError
+            Raises if a non-20x message is returned
+
+        AssertionError
+            Raises if no ID is provided
+        """
+        if proposalId is None:
+            assert proposal.proposalId is not None, "proposalId should not be None"
+            proposalId = proposal.proposalId
+        proposal.proposalId = None
+        return self._call_endpoint(
+            cmd="patch",
+            endpoint=f"Proposals/{quote_plus(proposalId)}",
+            data=proposal,
+            operation="proposals_update",
+        ).get("proposalId")
+
+    def datasets_find(
+        self, skip: int = 0, limit: int = 25, query_fields: Optional[dict] = None
+    ) -> Optional[dict]:
+        """
+        Gets datasets using the fullQuery mechanism of SciCat. This is
         appropriate for cases where might want paging and cases where you want to perform
         a text search on the Datasets collection. The full features of fullQuery search
         are beyond this document.
@@ -297,6 +493,10 @@ def get_datasets_full_query(self, skip=0, limit=25, query_fields=None):
 
         To query based on the full text search, send `{"text": "<text to query"}` as query field
 
+        This function was renamed.
+        It is still accessible with the original name for backward compatibility
+        The original name was find_datasets_full_query and get_datasets_full_query
+
         Parameters
         ----------
         skip : int
@@ -314,18 +514,27 @@ def get_datasets_full_query(self, skip=0, limit=25, query_fields=None):
         query_fields = json.dumps(query_fields)
         query = f'fields={query_fields}&limits={{"skip":{skip},"limit":{limit},"order":"creationTime:desc"}}'
 
-        url = f"{self._base_url}/Datasets/fullquery?{query}"
-        response = self._send_to_scicat(url, cmd="get")
-        if not response.ok:
-            err = response.json()["error"]
-            logger.error(f'{err["name"]}, {err["statusCode"]}: {err["message"]}')
-            return None
-        return response.json()
+        return self._call_endpoint(
+            cmd="get",
+            endpoint=f"Datasets/fullquery?{query}",
+            operation="datasets_find",
+            allow_404=True,
+        )
 
-    def get_datasets(self, filter_fields=None) -> List[Dataset]:
-        """Gets datasets using the simple fiter mechanism. This
+    """
+        find a set of datasets according the full query provided
+        Original name, kept for for backward compatibility
+    """
+    get_datasets_full_query = datasets_find
+    find_datasets_full_query = datasets_find
+
+    def datasets_get_many(self, filter_fields: Optional[dict] = None) -> Optional[dict]:
+        """
+        Gets datasets using the simple fiter mechanism. This
         is appropriate when you do not require paging or text search, but
         want to be able to limit results based on items in the Dataset object.
+        This function has been renamed and the old name has been mantained for backward compatibility
+        The previous names are find_datasets and get_datasets
 
         For example, a search for Datasets of a given proposalId would have
         ```python
@@ -335,6 +544,10 @@ def get_datasets(self, filter_fields=None) -> List[Dataset]:
         ```python
         filterField = {"proposalId": ""}
         ```
+        If you want to search on partial strings, you can use "like":
+        ```python
+        filterField = {"proposalId": {"like":"123"}}
+        ```
 
         Parameters
         ----------
@@ -343,26 +556,211 @@ def get_datasets(self, filter_fields=None) -> List[Dataset]:
         """
         if not filter_fields:
             filter_fields = {}
-
         filter_fields = json.dumps(filter_fields)
-        url = f'{self._base_url}/Datasets/?filter={{"where":{filter_fields}}}'
-        response = self._send_to_scicat(url, cmd="get")
-        if not response.ok:
-            err = response.json()["error"]
-            logger.error(f'{err["name"]}, {err["statusCode"]}: {err["message"]}')
-            return None
-        return response.json()
-
-    # this method is future, needs testing.
-    # def update_dataset(self, pid, fields: Dict):
-    #     response = self._send_to_scicat(
-    #         f"{self._base_url}/Datasets", dataDict=fields, cmd="patch"
-    #     )
-    #     if not response.ok:
-    #         err = response.json()["error"]
-    #         logger.error(f'{err["name"]}, {err["statusCode"]}: {err["message"]}')
-    #         return None
-    #     return response.json()
+        endpoint = f'Datasets?filter={{"where":{filter_fields}}}'
+        return self._call_endpoint(
+            cmd="get", endpoint=endpoint, operation="datasets_get_many", allow_404=True
+        )
+
+    """
+        find a set of datasets according to the simple filter provided
+        Original name, kept for for backward compatibility
+    """
+    get_datasets = datasets_get_many
+    find_datasets = datasets_get_many
+
+    def published_data_get_many(self, filter=None) -> Optional[dict]:
+        """
+        retrieve all the published data using the simple fiter mechanism. This
+        is appropriate when you do not require paging or text search, but
+        want to be able to limit results based on items in the Dataset object.
+        This function has been renamed and the old name has been maintained for backward compatibility
+        The previous name are find_published_data and get_published_data
+
+        For example, a search for published data of a given doi would have
+        ```python
+        filter = {"doi": "1234"}
+        ```
+
+        Parameters
+        ----------
+        filter : dict
+            Dictionary of filtering fields. Must be json serializable.
+        """
+        if filter:
+            filter = json.dumps(filter)
+
+        endpoint = "PublishedData" + (f'?filter={{"where":{filter}}}' if filter else "")
+        return self._call_endpoint(
+            cmd="get",
+            endpoint=endpoint,
+            operation="published_data_get_many",
+            allow_404=True,
+        )
+
+    """
+        find a set of published data according to the simple filter provided
+        Original name, kept for for backward compatibility
+    """
+    get_published_data = published_data_get_many
+    find_published_data = published_data_get_many
+
+    def datasets_get_one(self, pid: str) -> Optional[dict]:
+        """
+        Gets dataset with the pid provided.
+        This function has been renamed. Provious name has been maintained for backward compatibility.
+        Previous names was get_dataset_by_pid
+
+        Parameters
+        ----------
+        pid : string
+            pid of the dataset requested.
+        """
+        return self._call_endpoint(
+            cmd="get",
+            endpoint=f"Datasets/{quote_plus(pid)}",
+            operation="datasets_get_one",
+            allow_404=True,
+        )
+
+    get_dataset_by_pid = datasets_get_one
+
+    def instruments_get_one(self, pid: str = None, name: str = None) -> Optional[dict]:
+        """
+        Get an instrument by pid or by name.
+        If pid is provided it takes priority over name.
+
+        This function has been renamed. Previous name has been maintained for backward compatibility.
+        Previous name was get_instrument
+
+        Parameters
+        ----------
+        pid : str
+            Pid of the instrument
+
+        name : str
+            The name of the instrument
+
+        Returns
+        -------
+        dict
+            The instrument with the requested name
+        """
+
+        if pid:
+            endpoint = f"Instruments/{quote_plus(pid)}"
+        elif name:
+            query = json.dumps({"where": {"name": {"like": name}}})
+            endpoint = f"Instruments/findOne?{query}"
+        else:
+            raise ValueError("You must specify instrument pid or name")
+
+        return self._call_endpoint(
+            cmd="get",
+            endpoint=endpoint,
+            operation="instruments_get_one",
+            allow_404=True,
+        )
+
+    get_instrument = instruments_get_one
+
+    def samples_get_one(self, pid: str) -> Optional[dict]:
+        """
+        Get a sample by pid.
+        This function has been renamed. Previous name has been maintained for backward compatibility.
+        Previous name was get_sample
+
+
+        Parameters
+        ----------
+        pid : str
+            The pid of the sample
+
+        Returns
+        -------
+        dict
+            The sample with the requested pid
+        """
+        return self._call_endpoint(
+            cmd="get",
+            endpoint=f"Samples/{quote_plus(pid)}",
+            operation="samples_get_one",
+            allow_404=True,
+        )
+
+    get_sample = samples_get_one
+
+    def proposals_get_one(self, pid: str = None) -> Optional[dict]:
+        """
+        Get proposal by pid.
+        This function has been renamed. Previous name has been maintained for backward compatibility.
+        Previous name was get_proposal
+
+        Parameters
+        ----------
+        pid : str
+            The pid of the proposal
+
+        Returns
+        -------
+        dict
+            The proposal with the requested pid
+        """
+        return self._call_endpoint(
+            cmd="get", endpoint=f"Proposals/{quote_plus(pid)}", allow_404=True
+        )
+
+    get_proposal = proposals_get_one
+
+    def datasets_origdatablocks_get_one(self, pid: str) -> Optional[dict]:
+        """
+        Get dataset orig datablocks by dataset pid.
+        This function has been renamed. Previous name has been maintained for backward compatibility.
+        Previous name was get_dataset_origdatablocks
+
+        Parameters
+        ----------
+        pid : str
+            The pid of the dataset
+
+        Returns
+        -------
+        dict
+            The orig_datablocks of the dataset with the requested pid
+        """
+        return self._call_endpoint(
+            cmd="get",
+            endpoint=f"Datasets/{quote_plus(pid)}/origdatablocks",
+            operation="datasets_origdatablocks_get_one",
+            allow_404=True,
+        )
+
+    get_dataset_origdatablocks = datasets_origdatablocks_get_one
+
+    def datasets_delete(self, pid: str) -> Optional[dict]:
+        """
+        Delete dataset by pid
+        This function has been renamed. Previous name has been maintained for backward compatibility.
+        Previous name was delete_dataset
+
+        Parameters
+        ----------
+        pid : str
+            The pid of the dataset to be deleted
+
+        Returns
+        -------
+        dict
+            response from SciCat backend
+        """
+        return self._call_endpoint(
+            cmd="delete",
+            endpoint=f"Datasets/{quote_plus(pid)}",
+            operation="datasets_delete",
+            allow_404=True,
+        )
+
+    delete_dataset = datasets_delete
 
 
 def get_file_size(pathobj):
@@ -401,26 +799,55 @@ def from_credentials(base_url: str, username: str, password: str):
     return from_token(base_url, token)
 
 
-def get_token(base_url, username, password):
-    """logs in using the provided username / password combination
-    and receives token for further communication use"""
-    logger.info(f" Getting new token for user {username}")
-    if base_url[-1] != "/":
-        base_url = base_url + "/"
+def _log_in_via_users_login(base_url, username, password):
     response = requests.post(
-        base_url + "Users/login",
+        urljoin(base_url, "Users/login"),
+        json={"username": username, "password": password},
+        stream=False,
+        verify=True,
+    )
+    if not response.ok:
+        logger.info(f" Failed to log in via endpoint Users/login: {response.json()}")
+    return response
+
+
+def _log_in_via_auth_msad(base_url, username, password):
+    import re
+
+    # Strip the api/vn suffix
+    base_url = re.sub(r"/api/v\d+/?", "", base_url)
+    response = requests.post(
+        urljoin(base_url, "auth/msad"),
         json={"username": username, "password": password},
         stream=False,
         verify=True,
     )
     if not response.ok:
-        logger.error(f" ** Error received: {response}")
         err = response.json()["error"]
-        logger.error(f' {err["name"]}, {err["statusCode"]}: {err["message"]}')
+        logger.error(
+            f'Error retrieving token for user: {err["name"]}, {err["statusCode"]}: {err["message"]}'
+        )
         raise ScicatLoginError(response.content)
 
-    data = response.json()
-    # print("Response:", data)
-    token = data["id"]  # not sure if semantically correct
-    logger.info(f" token: {token}")
-    return token
+
+def get_token(base_url, username, password):
+    """logs in using the provided username / password combination
+    and receives token for further communication use"""
+    # Users/login only works for functional accounts and auth/msad for regular users.
+    # Try both and see what works. This is not nice but seems to be the only
+    # feasible solution right now.
+    logger.info(" Getting new token")
+
+    response = _log_in_via_users_login(base_url, username, password)
+    if response.ok:
+        return response.json()["id"]  # not sure if semantically correct
+
+    response = _log_in_via_auth_msad(base_url, username, password)
+    if response.ok:
+        return response.json()["access_token"]
+
+    err = response.json()["error"]
+    logger.error(
+        f' Failed log in:  {err["name"]}, {err["statusCode"]}: {err["message"]}'
+    )
+    raise ScicatLoginError(response.content)
diff --git a/pyscicat/model.py b/pyscicat/model.py
index f9f26fc..f7251be 100644
--- a/pyscicat/model.py
+++ b/pyscicat/model.py
@@ -13,20 +13,21 @@ class DatasetType(str, enum.Enum):
     derived = "derived"
 
 
-class Ownable(BaseModel):
-    """Many objects in SciCat are ownable"""
+class MongoQueryable(BaseModel):
+    """Many objects in SciCat are mongo queryable"""
 
-    ownerGroup: str
-    accessGroups: List[str]
+    createdBy: Optional[str] = None
+    updatedBy: Optional[str] = None
+    updatedAt: Optional[str] = None
+    createdAt: Optional[str] = None
 
 
-class MongoQueryable(BaseModel):
-    """Many objects in SciCat are mongo queryable"""
+class Ownable(MongoQueryable):
+    """Many objects in SciCat are ownable"""
 
-    createdBy: Optional[str]
-    updatedBy: Optional[str]
-    updatedAt: Optional[str]
-    createdAt: Optional[str]
+    ownerGroup: Optional[str] = None
+    accessGroups: Optional[List[str]] = None
+    instrumentGroup: Optional[str] = None
 
 
 class User(BaseModel):
@@ -40,39 +41,37 @@ class User(BaseModel):
     id: str
 
 
-class Proposal(Ownable, MongoQueryable):
+class Proposal(Ownable):
     """
     Defines the purpose of an experiment and links an experiment to principal investigator and main proposer
     """
 
-    # TODO: find out which of these are not optional and update
-    proposalId: Optional[str]
-    pi_email: Optional[str]
-    pi_firstname: Optional[str]
-    pi_lastname: Optional[str]
-    email: Optional[str]
-    firstname: Optional[str]
-    lastname: Optional[str]
-    title: Optional[str]
-    abstract: Optional[str]
-    startTime: Optional[str]
-    endTime: Optional[str]
+    proposalId: str
+    pi_email: Optional[str] = None
+    pi_firstname: Optional[str] = None
+    pi_lastname: Optional[str] = None
+    email: str
+    firstname: Optional[str] = None
+    lastname: Optional[str] = None
+    title: Optional[str] = None  # required in next backend version
+    abstract: Optional[str] = None
+    startTime: Optional[str] = None
+    endTime: Optional[str] = None
     MeasurementPeriodList: Optional[
         List[dict]
-    ]  # may need updating with the measurement period model
+    ] = None  # may need updating with the measurement period model
 
 
-class Sample(Ownable, MongoQueryable):
+class Sample(Ownable):
     """
     Models describing the characteristics of the samples to be investigated.
     Raw datasets should be linked to such sample definitions.
     """
 
-    # TODO: find out which of these are not optional and update
-    sampleId: Optional[str]
-    owner: Optional[str]
-    description: Optional[str]
-    sampleCharacteristics: Optional[dict]
+    sampleId: Optional[str] = None
+    owner: Optional[str] = None
+    description: Optional[str] = None
+    sampleCharacteristics: Optional[dict] = None
     isPublished: bool = False
 
 
@@ -84,15 +83,15 @@ class Job(MongoQueryable):
     track of analysis jobs e.g. for automated analysis workflows
     """
 
-    id: Optional[str]
+    id: Optional[str] = None
     emailJobInitiator: str
     type: str
-    creationTime: Optional[str]  # not sure yet which ones are optional or not.
-    executionTime: Optional[str]
-    jobParams: Optional[dict]
-    jobStatusMessage: Optional[str]
-    datasetList: Optional[dict]  # documentation says dict, but should maybe be list?
-    jobResultObject: Optional[dict]  # ibid.
+    creationTime: Optional[str] = None  # not sure yet which ones are optional or not.
+    executionTime: Optional[str] = None
+    jobParams: Optional[dict] = None
+    jobStatusMessage: Optional[str] = None
+    datasetList: Optional[dict] = None  # documentation says dict, but should maybe be list?
+    jobResultObject: Optional[dict] = None  # ibid.
 
 
 class Instrument(MongoQueryable):
@@ -100,41 +99,42 @@ class Instrument(MongoQueryable):
     Instrument class, most of this is flexibly definable in customMetadata
     """
 
-    pid: Optional[str]
+    pid: Optional[str] = None
     name: str
-    customMetadata: Optional[dict]
+    customMetadata: Optional[dict] = None
 
 
-class Dataset(Ownable, MongoQueryable):
+class Dataset(Ownable):
     """
     A dataset in SciCat, base class for derived and raw datasets
     """
 
-    pid: Optional[str]
-    classification: Optional[str]
+    pid: Optional[str] = None
+    classification: Optional[str] = None
     contactEmail: str
     creationTime: str  # datetime
-    datasetName: Optional[str]
-    description: Optional[str]
-    history: Optional[List[dict]]  # list of foreigh key ids to the Messages table
-    instrumentId: Optional[str]
+    datasetName: Optional[str] = None
+    description: Optional[str] = None
+    history: Optional[List[dict]] = None  # list of foreigh key ids to the Messages table
+    instrumentId: Optional[str] = None
     isPublished: Optional[bool] = False
-    keywords: Optional[List[str]]
-    license: Optional[str]
-    numberOfFiles: Optional[int]
-    numberOfFilesArchived: Optional[int]
-    orcidOfOwner: Optional[str]
-    packedSize: Optional[int]
+    keywords: Optional[List[str]] = None
+    license: Optional[str] = None
+    numberOfFiles: Optional[int] = None
+    numberOfFilesArchived: Optional[int] = None
+    orcidOfOwner: Optional[str] = None
+    packedSize: Optional[int] = None
     owner: str
-    ownerEmail: Optional[str]
-    sharedWith: Optional[List[str]]
-    size: Optional[int]
+    ownerEmail: Optional[str] = None
+    sharedWith: Optional[List[str]] = None
+    size: Optional[int] = None
     sourceFolder: str
-    sourceFolderHost: Optional[str]
-    techniques: Optional[List[dict]]  # with {'pid':pid, 'name': name} as entries
+    sourceFolderHost: Optional[str] = None
+    techniques: Optional[List[dict]] = None  # with {'pid':pid, 'name': name} as entries
     type: DatasetType
-    validationStatus: Optional[str]
-    version: Optional[str]
+    validationStatus: Optional[str] = None
+    version: Optional[str] = None
+    scientificMetadata: Optional[Dict] = None
 
 
 class RawDataset(Dataset):
@@ -142,17 +142,13 @@ class RawDataset(Dataset):
     Raw datasets from which derived datasets are... derived.
     """
 
-    principalInvestigator: Optional[str]
-    creationLocation: Optional[str]
-    dataFormat: str
-    type: DatasetType = "raw"
-    createdAt: Optional[str]  # datetime
-    updatedAt: Optional[str]  # datetime
-    dataFormat: Optional[str]
-    endTime: Optional[str]  # datetime
-    sampleId: Optional[str]
-    proposalId: Optional[str]
-    scientificMetadata: Optional[Dict]
+    principalInvestigator: Optional[str] = None
+    creationLocation: Optional[str] = None
+    type: DatasetType = DatasetType.raw
+    dataFormat: Optional[str] = None
+    endTime: Optional[str] = None  # datetime
+    sampleId: Optional[str] = None
+    proposalId: Optional[str] = None
 
 
 class DerivedDataset(Dataset):
@@ -160,12 +156,12 @@ class DerivedDataset(Dataset):
     Derived datasets which have been generated based on one or more raw datasets
     """
 
-    investigator: Optional[str]
+    investigator: str
     inputDatasets: List[str]
-    usedSoftware: List[str]  # not optional!
-    jobParameters: Optional[dict]
-    jobLogData: Optional[str]
-    scientificMetadata: Optional[Dict]
+    usedSoftware: List[str]
+    jobParameters: Optional[dict] = None
+    jobLogData: Optional[str] = None
+    type: DatasetType = DatasetType.derived
 
 
 class DataFile(MongoQueryable):
@@ -177,7 +173,8 @@ class DataFile(MongoQueryable):
 
     path: str
     size: int
-    time: Optional[str]
+    time: Optional[str] = None
+    chk: Optional[str] = None
     uid: Optional[str] = None
     gid: Optional[str] = None
     perm: Optional[str] = None
@@ -188,12 +185,26 @@ class Datablock(Ownable):
     A Datablock maps between a Dataset and contains DataFiles
     """
 
-    id: Optional[str]
+    id: Optional[str] = None
     # archiveId: str = None  listed in catamel model, but comes back invalid?
     size: int
-    packedSize: Optional[int]
-    chkAlg: Optional[int]
+    packedSize: Optional[int] = None
+    chkAlg: Optional[int] = None
     version: str = None
+    instrumentGroup: Optional[str] = None
+    dataFileList: List[DataFile]
+    datasetId: str
+
+
+class OrigDatablock(Ownable):
+    """
+    An Original Datablock maps between a Dataset and contains DataFiles
+    """
+
+    id: Optional[str] = None
+    chkAlg :Optional[str] = None
+    size: int
+    instrumentGroup: Optional[str] = None
     dataFileList: List[DataFile]
     datasetId: str
 
@@ -203,7 +214,35 @@ class Attachment(Ownable):
     Attachments can be any base64 encoded string...thumbnails are attachments
     """
 
-    id: Optional[str]
+    id: Optional[str] = None
     thumbnail: str
-    caption: Optional[str]
+    caption: Optional[str] = None
     datasetId: str
+
+
+class PublishedData:
+    """
+    Published Data with registered DOI
+    """
+
+    doi: str
+    affiliation: str
+    creator: List[str]
+    publisher: str
+    publicationYear: int
+    title: str
+    url: Optional[str] = None
+    abstract: str
+    dataDescription: str
+    resourceType: str
+    numberOfFiles: Optional[int] = None
+    sizeOfArchive: Optional[int] = None
+    pidArray: List[str]
+    authors: List[str]
+    registeredTime: str
+    status: str
+    thumbnail: Optional[str] = None
+    createdBy: str
+    updatedBy: str
+    createdAt: str
+    updatedAt: str
diff --git a/pyscicat/tests/test_client.py b/pyscicat/tests/test_client.py
deleted file mode 100644
index 5c470e6..0000000
--- a/pyscicat/tests/test_client.py
+++ /dev/null
@@ -1,107 +0,0 @@
-from datetime import datetime
-from pathlib import Path
-
-import requests_mock
-from ..client import (
-    from_credentials,
-    from_token,
-    encode_thumbnail,
-    get_file_mod_time,
-    get_file_size,
-)
-
-from ..model import (
-    Attachment,
-    Datablock,
-    DataFile,
-    Dataset,
-    Ownable,
-)
-
-local_url = "http://localhost:3000/api/v3/"
-
-
-def add_mock_requests(mock_request):
-    mock_request.post(
-        local_url + "Users/login",
-        json={"id": "a_token"},
-    )
-    mock_request.post(local_url + "Samples", json={"sampleId": "dataset_id"})
-    mock_request.post(local_url + "RawDatasets/replaceOrCreate", json={"pid": "42"})
-    mock_request.post(
-        local_url + "RawDatasets/42/origdatablocks",
-        json={"response": "random"},
-    )
-    mock_request.post(
-        local_url + "RawDatasets/42/attachments",
-        json={"response": "random"},
-    )
-
-
-def test_scicate_ingest():
-    with requests_mock.Mocker() as mock_request:
-        add_mock_requests(mock_request)
-        scicat = from_credentials(
-            base_url=local_url,
-            username="Zaphod",
-            password="heartofgold",
-        )
-        assert (
-            scicat._token == "a_token"
-        ), "scicat client set the token given by the server"
-
-        ownable = Ownable(ownerGroup="magrathea", accessGroups=["deep_though"])
-        thumb_path = Path(__file__).parent / "data/SciCatLogo.png"
-
-        time = get_file_mod_time(thumb_path)
-        assert time is not None
-        size = get_file_size(thumb_path)
-        assert size is not None
-
-        # RawDataset
-        dataset = Dataset(
-            path="/foo/bar",
-            size=42,
-            owner="slartibartfast",
-            contactEmail="slartibartfast@magrathea.org",
-            creationLocation="magrathea",
-            creationTime=str(datetime.now()),
-            type="raw",
-            instrumentId="earth",
-            proposalId="deepthought",
-            dataFormat="planet",
-            principalInvestigator="A. Mouse",
-            sourceFolder="/foo/bar",
-            scientificMetadata={"a": "field"},
-            sampleId="gargleblaster",
-            **ownable.dict()
-        )
-        dataset_id = scicat.upload_raw_dataset(dataset)
-
-        # Datablock with DataFiles
-        data_file = DataFile(path="/foo/bar", size=42)
-        data_block = Datablock(
-            size=42,
-            version=1,
-            datasetId=dataset_id,
-            dataFileList=[data_file],
-            **ownable.dict()
-        )
-        scicat.upload_datablock(data_block)
-
-        # Attachment
-        attachment = Attachment(
-            datasetId=dataset_id,
-            thumbnail=encode_thumbnail(thumb_path),
-            caption="scattering image",
-            **ownable.dict()
-        )
-        scicat.upload_attachment(attachment)
-
-
-def test_initializers():
-    with requests_mock.Mocker() as mock_request:
-        add_mock_requests(mock_request)
-
-        client = from_token(local_url, "let me in!")
-        assert client._token == "let me in!"
diff --git a/requirements-dev.txt b/requirements-dev.txt
deleted file mode 100644
index 2759b55..0000000
--- a/requirements-dev.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-# These are required for developing the package (running the tests, building
-# the documentation) but not necessarily required for _using_ it.
-codecov
-coverage
-flake8
-pytest
-sphinx
-twine
-pre-commit
-black
-nbstripout
-requests_mock
-# These are dependencies of various sphinx extensions for documentation.
-ipython
-matplotlib
-mistune <2.0.0  # temporary while sphinx sorts this out
-myst-parser
-numpydoc
-sphinx-click
-sphinx-copybutton
-sphinxcontrib.openapi
-sphinx_rtd_theme
diff --git a/requirements-hdf5.txt b/requirements-hdf5.txt
deleted file mode 100644
index c3b2f48..0000000
--- a/requirements-hdf5.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-hdf5plugin
-h5py
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 76aa8db..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-pydantic
-requests
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
index c80108f..e87f52d 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -4,3 +4,59 @@ style = pep440-post
 versionfile_source = pyscicat/_version.py
 versionfile_build = pyscicat/_version.py
 tag_prefix = v
+
+[metadata]
+
+name = pyscicat
+description = a python API to communicate with the Scicat API
+long_description = file: README.md
+long_description_content_type = text/markdown
+author = Dylan McReynolds
+author_email = dmcreynolds@lbl.gov
+url = https://github.com/scicatproject/pyscicat
+license_files = LICENSE
+license=BSD (3-clause)
+classifiers=
+        Development Status :: 2 - Pre-Alpha
+        Natural Language :: English
+        Programming Language :: Python :: 3.7
+
+
+[options]
+include_package_data = True
+packages= find:
+install_requires =
+    pydantic
+    requests
+
+python_requires  = >=3.7
+
+
+
+[options.extras_require]
+hdf5 =
+    hdf5plugin
+    h5py
+dev =
+    codecov
+    coverage
+    flake8
+    pytest
+    sphinx
+    twine
+    black
+    requests_mock
+docs =
+    ipython
+    matplotlib
+    mistune <2.0.0  # temporary while sphinx sorts this out
+    myst-parser
+    numpydoc
+    sphinx-click
+    sphinx-copybutton
+    sphinxcontrib.openapi
+    sphinx_rtd_theme
+
+[options.packages.find]
+exclude =
+      continuous_integration
diff --git a/setup.py b/setup.py
index 9ecf6de..1d0c607 100644
--- a/setup.py
+++ b/setup.py
@@ -1,68 +1,7 @@
-from pathlib import Path
-from setuptools import setup, find_packages
-import sys
+from setuptools import setup
 import versioneer
 
-min_version = (3, 7)
-if sys.version_info < min_version:
-    error = """
-pyscicat does not support Python {0}.{1}.
-Python {2}.{3} and above is required. Check your Python version like so:
-
-python3 --version
-
-This may be due to an out-of-date pip. Make sure you have pip >= 9.0.1.
-Upgrade pip like so:
-
-pip install --upgrade pip
-""".format(
-        *(sys.version_info[:2] + min_version)
-    )
-    sys.exit(error)
-
-here = Path(__file__).absolute()
-
-with open(here.with_name("README.md"), encoding="utf-8") as readme_file:
-    readme = readme_file.read()
-
-
-def read_requirements_from_here(here: Path, filename: str = None) -> list:
-    assert filename is not None, "filename as string must be provided"
-    assert here.with_name(
-        filename
-    ).exists(), f"requirements filename {filename.as_posix()} does not exist"
-    with open(here.with_name(filename)) as requirements_file:
-        # Parse requirements.txt, ignoring any commented-out lines.
-        requirements = [
-            line
-            for line in requirements_file.read().splitlines()
-            if not line.startswith("#")
-        ]
-    return requirements
-
-
-extras_require = {}
-extras_require["base"] = read_requirements_from_here(here, "requirements.txt")
-extras_require["h5tools"] = read_requirements_from_here(here, "requirements-hdf5.txt")
-
 setup(
-    name="pyscicat",
     version=versioneer.get_version(),
-    cmdclass=versioneer.get_cmdclass(),
-    description="Code for communicating to a SciCat backend server python",
-    long_description=readme,
-    author="Dylan McReynolds",
-    author_email="dmcreynolds@lbl.gov",
-    url="https://github.com/scicatproject/pyscicat",
-    python_requires=">={}".format(".".join(str(n) for n in min_version)),
-    packages=find_packages(exclude=["docs", "tests"]),
-    include_package_data=True,
-    extras_require=extras_require,
-    install_requires=extras_require["base"],
-    license="BSD (3-clause)",
-    classifiers=[
-        "Development Status :: 2 - Pre-Alpha",
-        "Natural Language :: English",
-        "Programming Language :: Python :: 3",
-    ],
+    cmdclass=versioneer.get_cmdclass()
 )
diff --git a/pyscicat/tests/__init__.py b/tests/test_hdf5/__init__.py
similarity index 100%
rename from pyscicat/tests/__init__.py
rename to tests/test_hdf5/__init__.py
diff --git a/pyscicat/hdf5/_tests/testdata/cylinderHex_r5_s12_T50_large_ranW_0p5.nxs b/tests/test_hdf5/data/cylinderHex_r5_s12_T50_large_ranW_0p5.nxs
similarity index 100%
rename from pyscicat/hdf5/_tests/testdata/cylinderHex_r5_s12_T50_large_ranW_0p5.nxs
rename to tests/test_hdf5/data/cylinderHex_r5_s12_T50_large_ranW_0p5.nxs
diff --git a/pyscicat/hdf5/_tests/test_hdf5sct.py b/tests/test_hdf5/test_hdf5sct.py
similarity index 83%
rename from pyscicat/hdf5/_tests/test_hdf5sct.py
rename to tests/test_hdf5/test_hdf5sct.py
index 466e190..3f3af64 100644
--- a/pyscicat/hdf5/_tests/test_hdf5sct.py
+++ b/tests/test_hdf5/test_hdf5sct.py
@@ -9,19 +9,19 @@
 
 def test_readValue():
     # more intelligent path finding:
-    p = sorted(Path(".").glob("**/cylinderHex_r5_s12_T50_large_ranW_0p5.nxs"))[0]
+    p = sorted(Path("").glob("**/cylinderHex_r5_s12_T50_large_ranW_0p5.nxs"))[0]
     v = h5Get(p, "/sasentry1/sasdata1/I")
     assert v != "none", "Did not extract value"
 
 
 def test_readAttribute():
-    p = sorted(Path(".").glob("**/cylinderHex_r5_s12_T50_large_ranW_0p5.nxs"))[0]
+    p = sorted(Path("").glob("**/cylinderHex_r5_s12_T50_large_ranW_0p5.nxs"))[0]
     v = h5Get(p, "/sasentry1/sasdata1@timestamp")
     assert v != "none", "Did not extract attribute"
 
 
 def test_readMixedDict():
-    p = sorted(Path(".").glob("**/cylinderHex_r5_s12_T50_large_ranW_0p5.nxs"))[0]
+    p = sorted(Path("").glob("**/cylinderHex_r5_s12_T50_large_ranW_0p5.nxs"))[0]
     v = h5GetDict(
         p,
         {
@@ -38,7 +38,7 @@ def test_readMixedDict():
 
 
 def test_readMetadata_withroot():
-    p = sorted(Path(".").glob("**/cylinderHex_r5_s12_T50_large_ranW_0p5.nxs"))[0]
+    p = sorted(Path("").glob("**/cylinderHex_r5_s12_T50_large_ranW_0p5.nxs"))[0]
     assert p.exists(), f"HDF5/NeXus test file: {p.as_posix()} cannot be found"
     resultDict = scientific_metadata(p, excludeRootEntry=True, skipKeyList=["sasdata1"])
     assert resultDict is not None, "scientific_metadata has not returned anything"
diff --git a/pyscicat/tests/conftest.py b/tests/test_pyscicat/__init__.py
similarity index 100%
rename from pyscicat/tests/conftest.py
rename to tests/test_pyscicat/__init__.py
diff --git a/tests/test_pyscicat/conftest.py b/tests/test_pyscicat/conftest.py
new file mode 100644
index 0000000..e69de29
diff --git a/pyscicat/tests/data/SciCatLogo.png b/tests/test_pyscicat/data/SciCatLogo.png
similarity index 100%
rename from pyscicat/tests/data/SciCatLogo.png
rename to tests/test_pyscicat/data/SciCatLogo.png
diff --git a/tests/test_pyscicat/test_client.py b/tests/test_pyscicat/test_client.py
new file mode 100644
index 0000000..46584f1
--- /dev/null
+++ b/tests/test_pyscicat/test_client.py
@@ -0,0 +1,230 @@
+from datetime import datetime
+from pathlib import Path
+
+import pytest
+import requests_mock
+from pyscicat.client import (
+    from_credentials,
+    from_token,
+    encode_thumbnail,
+    get_file_mod_time,
+    get_file_size,
+    ScicatCommError,
+)
+
+from pyscicat.model import (
+    Attachment,
+    Datablock,
+    DataFile,
+    Instrument,
+    Proposal,
+    RawDataset,
+    Sample,
+    Ownable,
+)
+
+local_url = "http://localhost:3000/api/v3/"
+
+
+def add_mock_requests(mock_request):
+    mock_request.post(
+        local_url + "Users/login",
+        json={"id": "a_token"},
+    )
+
+    mock_request.post(local_url + "Instruments", json={"pid": "earth"})
+    mock_request.post(local_url + "Proposals", json={"proposalId": "deepthought"})
+    mock_request.post(local_url + "Samples", json={"sampleId": "gargleblaster"})
+    mock_request.patch(local_url + "Instruments/earth", json={"pid": "earth"})
+    mock_request.patch(
+        local_url + "Proposals/deepthought", json={"proposalId": "deepthought"}
+    )
+    mock_request.patch(
+        local_url + "Samples/gargleblaster", json={"sampleId": "gargleblaster"}
+    )
+
+    mock_request.post(local_url + "RawDatasets/replaceOrCreate", json={"pid": "42"})
+    mock_request.patch(
+        local_url + "Datasets/42",
+        json={"pid": "42"},
+    )
+    mock_request.post(
+        local_url + "Datasets/42/origdatablocks",
+        json={"response": "random"},
+    )
+    mock_request.post(
+        local_url + "Datasets/42/attachments",
+        json={"response": "random"},
+    )
+
+    mock_request.post(local_url + "Datasets", json={"pid": "42"})
+
+
+def test_scicat_ingest():
+    with requests_mock.Mocker() as mock_request:
+        add_mock_requests(mock_request)
+        scicat = from_credentials(
+            base_url=local_url,
+            username="Zaphod",
+            password="heartofgold",
+        )
+        assert (
+            scicat._token == "a_token"
+        ), "scicat client set the token given by the server"
+
+        ownable = Ownable(ownerGroup="magrathea", accessGroups=["deep_though"])
+        thumb_path = Path(__file__).parent / "data/SciCatLogo.png"
+
+        time = get_file_mod_time(thumb_path)
+        assert time is not None
+        size = get_file_size(thumb_path)
+        assert size is not None
+
+        # Instrument
+        instrument = Instrument(
+            pid="earth", name="Earth", customMetadata={"a": "field"}
+        )
+        assert scicat.upload_instrument(instrument) == "earth"
+        assert scicat.instruments_create(instrument) == "earth"
+        assert scicat.instruments_update(instrument) == "earth"
+
+        # Proposal
+        proposal = Proposal(
+            proposalId="deepthought",
+            title="Deepthought",
+            email="deepthought@viltvodle.com",
+            **ownable.dict()
+        )
+        assert scicat.upload_proposal(proposal) == "deepthought"
+        assert scicat.proposals_create(proposal) == "deepthought"
+        assert scicat.proposals_update(proposal) == "deepthought"
+
+        # Sample
+        sample = Sample(
+            sampleId="gargleblaster",
+            description="Gargleblaster",
+            sampleCharacteristics={"a": "field"},
+            **ownable.dict()
+        )
+        assert scicat.upload_sample(sample) == "gargleblaster"
+        assert scicat.samples_create(sample) == "gargleblaster"
+        assert scicat.samples_update(sample) == "gargleblaster"
+
+        # RawDataset
+        dataset = RawDataset(
+            path="/foo/bar",
+            size=42,
+            owner="slartibartfast",
+            contactEmail="slartibartfast@magrathea.org",
+            creationLocation="magrathea",
+            creationTime=str(datetime.now()),
+            type="raw",
+            instrumentId="earth",
+            proposalId="deepthought",
+            dataFormat="planet",
+            principalInvestigator="A. Mouse",
+            sourceFolder="/foo/bar",
+            scientificMetadata={"a": "field"},
+            sampleId="gargleblaster",
+            **ownable.dict()
+        )
+        dataset_id = scicat.upload_new_dataset(dataset)
+        assert dataset_id == "42"
+
+        # Update record
+        dataset.principalInvestigator = "B. Turtle"
+        dataset_id_2 = scicat.update_dataset(dataset, dataset_id)
+        assert dataset_id_2 == dataset_id
+
+        # Datablock with DataFiles
+        data_file = DataFile(path="/foo/bar", size=42)
+        data_block = Datablock(
+            size=42,
+            version="1",
+            datasetId=dataset_id,
+            dataFileList=[data_file],
+            **ownable.dict()
+        )
+        scicat.upload_dataset_origdatablock(data_block)
+
+        # Attachment
+        attachment = Attachment(
+            datasetId=dataset_id,
+            thumbnail=encode_thumbnail(thumb_path),
+            caption="scattering image",
+            **ownable.dict()
+        )
+        scicat.upload_attachment(attachment)
+
+
+def test_get_dataset():
+    with requests_mock.Mocker() as mock_request:
+        dataset = RawDataset(
+            size=42,
+            owner="slartibartfast",
+            contactEmail="slartibartfast@magrathea.org",
+            creationLocation="magrathea",
+            creationTime=str(datetime.now()),
+            instrumentId="earth",
+            proposalId="deepthought",
+            dataFormat="planet",
+            principalInvestigator="A. Mouse",
+            sourceFolder="/foo/bar",
+            scientificMetadata={"a": "field"},
+            sampleId="gargleblaster",
+            ownerGroup="magrathea",
+            accessGroups=["deep_though"],
+        )
+        mock_request.get(
+            local_url + "Datasets/123", json=dataset.dict(exclude_none=True)
+        )
+
+        client = from_token(base_url=local_url, token="a_token")
+        retrieved = client.datasets_get_one("123")
+        assert retrieved == dataset.dict(exclude_none=True)
+
+
+def test_get_nonexistent_dataset():
+    with requests_mock.Mocker() as mock_request:
+        mock_request.get(
+            local_url + "Datasets/74",
+            status_code=404,
+            reason="Not Found",
+            json={
+                "error": {
+                    "statusCode": 404,
+                    "name": "Error",
+                    "message": 'Unknown "Dataset" id "74".',
+                    "code": "MODEL_NOT_FOUND",
+                }
+            },
+        )
+        client = from_token(base_url=local_url, token="a_token")
+        assert client.datasets_get_one("74") is None
+
+
+def test_get_dataset_bad_url():
+    with requests_mock.Mocker() as mock_request:
+        mock_request.get(
+            "http://localhost:3000/api/v100/Datasets/53",
+            status_code=404,
+            reason="Not Found",
+            json={
+                "error": {
+                    "statusCode": 404,
+                    "name": "Error",
+                    "message": "Cannot GET /api/v100/Datasets/53",
+                }
+            },
+        )
+        client = from_token(base_url="http://localhost:3000/api/v100", token="a_token")
+        with pytest.raises(ScicatCommError):
+            client.datasets_get_one("53")
+
+
+def test_initializers():
+    with requests_mock.Mocker() as mock_request:
+        add_mock_requests(mock_request)
+
+        client = from_token(local_url, "let me in!")
+        assert client._token == "let me in!"
diff --git a/tests/tests_integration/__init__.py b/tests/tests_integration/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/tests_integration/tests_integration.py b/tests/tests_integration/tests_integration.py
new file mode 100644
index 0000000..86ad83c
--- /dev/null
+++ b/tests/tests_integration/tests_integration.py
@@ -0,0 +1,94 @@
+from pyscicat.client import ScicatClient
+from pyscicat.model import RawDataset, Ownable
+from datetime import datetime
+import os
+
+
+"""
+These test_pyscicat do not use mocks and are designed to connect
+ to a v4 service for Scicat backend. You can run this easily
+in docker-compose following the repo
+https://github.com/SciCatProject/scicatlive.
+You will also need to use one of the default user accounts or add
+your own.
+
+You will need to set environmental variables for
+BASE_URL - the url of your scicat service e.g. http://localhost:3000/api/v3
+SCICAT_USER - the name of your scicat user.
+SCICAT_PASSWORD - the password for your scicat user.
+"""
+
+sci_clie = ScicatClient(base_url=os.environ["BASE_URL"],
+                        token=None,
+                        username=os.environ["SCICAT_USER"],
+                        password=os.environ["SCICAT_PASSWORD"])
+
+
+def test_client():
+    assert type(sci_clie) == ScicatClient  # noqa: E721
+
+
+def test_upload_dataset():
+    ownable = Ownable(ownerGroup="ingestor", accessGroups=[])
+    payload = RawDataset(
+        datasetName="a new guide book",
+        path="/foo/bar",
+        size=42,
+        packedSize=0,
+        owner=os.environ["SCICAT_USER"],
+        contactEmail="slartibartfast@magrathea.org",
+        creationLocation="Magrathea",
+        creationTime=datetime.isoformat(datetime.now()),
+        instrumentId="earth",
+        proposalId="deepthought",
+        dataFormat="planet",
+        principalInvestigator="A. Mouse",
+        sourceFolder="/foo/bar",
+        scientificMetadata={"type": "string", "value": {"a": "field"}},
+        sampleId="gargleblaster",
+        type="raw",
+        ownerEmail="scicatingestor@your.site",
+        sourceFolderHost="s3.heartofgold.org",
+        endTime=datetime.isoformat(datetime.now()),
+        techniques=[],
+        numberOfFiles=0,
+        numberOfFilesArchived=0,
+        **ownable.dict()
+    )
+
+    sci_clie.upload_new_dataset(payload)
+
+
+def test_get_dataset():
+
+    datasets = sci_clie.get_datasets({"ownerGroup": "ingestor"})
+
+    for dataset in datasets:
+        assert dataset["ownerGroup"] == "ingestor"
+
+
+def test_update_dataset():
+    sci_clie = ScicatClient(base_url=os.environ["BASE_URL"],
+                            token=None,
+                            username=os.environ["SCICAT_USER"],
+                            password=os.environ["SCICAT_PASSWORD"])
+
+    datasets = sci_clie.get_datasets({})
+    pid = datasets[0]["pid"]
+    payload = RawDataset(
+        size=142,
+        owner="slartibartfast",
+        ownerGroup="Magrateheans",
+        contactEmail="slartibartfast@magrathea.org",
+        creationLocation="magrathea",
+        creationTime=datetime.isoformat(datetime.now()),
+        instrumentId="earth",
+        proposalId="deepthought",
+        dataFormat="planet",
+        principalInvestigator="A. Mouse",
+        sourceFolder="/foo/bar",
+        scientificMetadata={"a": "field"},
+        sampleId="gargleblaster",
+        accessGroups=["Vogons"]
+    )
+    sci_clie.update_dataset(payload, pid)