Merge pull request #126 from tgisaturday/master

Add SPICE from coco-caption
Maluuba · Sep 20, 2023 · fa06383 · fa06383
2 parents 7b05b61 + dc53c98
commit fa06383
Show file tree

Hide file tree

Showing 25 changed files with 142 additions and 5 deletions.
diff --git a/LICENSE.md b/LICENSE.md
@@ -43,3 +43,18 @@ Redistribution and use in source and binary forms, with or without modification,
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" ANDANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIEDWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 The views and conclusions contained in the software and documentation are those of the authors and should not be interpreted as representing official policies, either expressed or implied, of the FreeBSD Project.
+
+### Semantic Propositional Image Caption Evaluation (SPICE)
+From the paper "SPICE: Semantic Propositional Image Caption Evaluation" ECCV 2016.
+Copyright (c) 2016 Peter Anderson and Basura Fernando and Mark Johnson and Stephen Gould
+https://github.com/peteanderson80/SPICE
+
+Licensed under the GNU Affero General Public License v3.0 (the "License"); you may not use this file except in compliance with the License.
+
+You may obtain a copy of the License at
+
+       https://www.gnu.org/licenses/agpl-3.0.html
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+
+See the License for the specific language governing permissions and limitations under the License.
diff --git a/bin/nlg-eval b/bin/nlg-eval
@@ -10,7 +10,7 @@ import stat
 import sys
 import time
 from zipfile import ZipFile
-
+import shutil
 import click
 from xdg import XDG_CONFIG_HOME, XDG_CACHE_HOME
 
@@ -111,6 +111,10 @@ def setup(data_path):
         url='http://www.cs.toronto.edu/~rkiros/models/dictionary.txt',
         target_dir=data_path
     ))
+    downloads.append(dict(
+            url='http://nlp.stanford.edu/software/stanford-corenlp-full-2015-12-09.zip',
+            target_dir=CODE_PATH
+    ))   
     downloads.append(dict(
         url='http://www.cs.toronto.edu/~rkiros/models/utable.npy',
         target_dir=data_path
@@ -165,6 +169,14 @@ def setup(data_path):
             if os.path.exists(p):
                 os.remove(p)
 
+    with ZipFile(os.path.join(CODE_PATH, 'stanford-corenlp-full-2015-12-09.zip')) as z:
+        z.extractall(CODE_PATH)
+    temp_path = os.path.join(CODE_PATH,'stanford-corenlp-full-2015-12-09')
+    os.makedirs(os.path.join(CODE_PATH,'pycocoevalcap/spice/lib/'), exist_ok=True)
+    shutil.move(os.path.join(temp_path,'stanford-corenlp-3.6.0.jar'), os.path.join(CODE_PATH,'pycocoevalcap/spice/lib/stanford-corenlp-3.6.0.jar'))
+    shutil.move(os.path.join(temp_path,'stanford-corenlp-3.6.0-models.jar'),os.path.join(CODE_PATH,'pycocoevalcap/spice/lib/stanford-corenlp-3.6.0-models.jar')) 
+    os.remove(os.path.join(CODE_PATH, 'stanford-corenlp-full-2015-12-09.zip'))
+
     path = os.path.join(CODE_PATH, 'multibleu/multi-bleu.perl')
     stats = os.stat(path)
     os.chmod(path, stats.st_mode | stat.S_IEXEC)

diff --git a/nlgeval/.DS_Store b/nlgeval/.DS_Store
diff --git a/nlgeval/__init__.py b/nlgeval/__init__.py
@@ -9,7 +9,7 @@
 from nlgeval.pycocoevalcap.cider.cider import Cider
 from nlgeval.pycocoevalcap.meteor.meteor import Meteor
 from nlgeval.pycocoevalcap.rouge.rouge import Rouge
-
+from nlgeval.pycocoevalcap.spice.spice import Spice
 
 # str/unicode stripping in Python 2 and 3 instead of `str.strip`.
 def _strip(s):
@@ -34,7 +34,8 @@ def compute_metrics(hypothesis, references, no_overlap=False, no_skipthoughts=Fa
             (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
             (Meteor(), "METEOR"),
             (Rouge(), "ROUGE_L"),
-            (Cider(), "CIDEr")
+            (Cider(), "CIDEr"),
+            (Spice(), "SPICE")
         ]
         for scorer, method in scorers:
             score, scores = scorer.compute_score(refs, hyps)
@@ -101,7 +102,8 @@ def compute_individual_metrics(ref, hyp, no_overlap=False, no_skipthoughts=False
             (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
             (Meteor(), "METEOR"),
             (Rouge(), "ROUGE_L"),
-            (Cider(), "CIDEr")
+            (Cider(), "CIDEr"),
+            (Spice(), "SPICE")            
         ]
         for scorer, method in scorers:
             score, scores = scorer.compute_score(refs, hyps)
@@ -158,6 +160,7 @@ class NLGEval(object):
                         'METEOR',
                         'ROUGE_L',
                         'CIDEr',
+                        'SPICE',
 
                         # Skip-thought
                         'SkipThoughtCS',
@@ -224,7 +227,8 @@ def load_scorers(self):
             self.scorers.append((Rouge(), "ROUGE_L"))
         if 'CIDEr' not in self.metrics_to_omit:
             self.scorers.append((Cider(), "CIDEr"))
-
+        if 'SPICE' not in self.metrics_to_omit:
+            self.scorers.append((Spice(), "SPICE"))
 
     def load_skipthought_model(self):
         from nlgeval.skipthoughts import skipthoughts

diff --git a/nlgeval/pycocoevalcap/.DS_Store b/nlgeval/pycocoevalcap/.DS_Store
diff --git a/nlgeval/pycocoevalcap/spice/__init__.py b/nlgeval/pycocoevalcap/spice/__init__.py
diff --git a/nlgeval/pycocoevalcap/spice/lib/Meteor-1.5.jar b/nlgeval/pycocoevalcap/spice/lib/Meteor-1.5.jar
diff --git a/nlgeval/pycocoevalcap/spice/lib/SceneGraphParser-1.0.jar b/nlgeval/pycocoevalcap/spice/lib/SceneGraphParser-1.0.jar
diff --git a/nlgeval/pycocoevalcap/spice/lib/ejml-0.23.jar b/nlgeval/pycocoevalcap/spice/lib/ejml-0.23.jar
diff --git a/nlgeval/pycocoevalcap/spice/lib/fst-2.47.jar b/nlgeval/pycocoevalcap/spice/lib/fst-2.47.jar
diff --git a/nlgeval/pycocoevalcap/spice/lib/guava-19.0.jar b/nlgeval/pycocoevalcap/spice/lib/guava-19.0.jar
diff --git a/nlgeval/pycocoevalcap/spice/lib/hamcrest-core-1.3.jar b/nlgeval/pycocoevalcap/spice/lib/hamcrest-core-1.3.jar
diff --git a/nlgeval/pycocoevalcap/spice/lib/jackson-core-2.5.3.jar b/nlgeval/pycocoevalcap/spice/lib/jackson-core-2.5.3.jar
diff --git a/nlgeval/pycocoevalcap/spice/lib/javassist-3.19.0-GA.jar b/nlgeval/pycocoevalcap/spice/lib/javassist-3.19.0-GA.jar
diff --git a/nlgeval/pycocoevalcap/spice/lib/json-simple-1.1.1.jar b/nlgeval/pycocoevalcap/spice/lib/json-simple-1.1.1.jar
diff --git a/nlgeval/pycocoevalcap/spice/lib/junit-4.12.jar b/nlgeval/pycocoevalcap/spice/lib/junit-4.12.jar
diff --git a/nlgeval/pycocoevalcap/spice/lib/lmdbjni-0.4.6.jar b/nlgeval/pycocoevalcap/spice/lib/lmdbjni-0.4.6.jar
diff --git a/nlgeval/pycocoevalcap/spice/lib/lmdbjni-linux64-0.4.6.jar b/nlgeval/pycocoevalcap/spice/lib/lmdbjni-linux64-0.4.6.jar
diff --git a/nlgeval/pycocoevalcap/spice/lib/lmdbjni-osx64-0.4.6.jar b/nlgeval/pycocoevalcap/spice/lib/lmdbjni-osx64-0.4.6.jar
diff --git a/nlgeval/pycocoevalcap/spice/lib/lmdbjni-win64-0.4.6.jar b/nlgeval/pycocoevalcap/spice/lib/lmdbjni-win64-0.4.6.jar
diff --git a/nlgeval/pycocoevalcap/spice/lib/objenesis-2.4.jar b/nlgeval/pycocoevalcap/spice/lib/objenesis-2.4.jar
diff --git a/nlgeval/pycocoevalcap/spice/lib/slf4j-api-1.7.12.jar b/nlgeval/pycocoevalcap/spice/lib/slf4j-api-1.7.12.jar
diff --git a/nlgeval/pycocoevalcap/spice/lib/slf4j-simple-1.7.21.jar b/nlgeval/pycocoevalcap/spice/lib/slf4j-simple-1.7.21.jar
diff --git a/nlgeval/pycocoevalcap/spice/spice-1.0.jar b/nlgeval/pycocoevalcap/spice/spice-1.0.jar
diff --git a/nlgeval/pycocoevalcap/spice/spice.py b/nlgeval/pycocoevalcap/spice/spice.py
@@ -0,0 +1,106 @@
+from __future__ import division
+import os
+import sys
+import subprocess
+import threading
+import json
+import numpy as np
+import ast
+import tempfile
+
+# Assumes spice.jar is in the same directory as spice.py.  Change as needed.
+SPICE_JAR = 'spice-1.0.jar'
+TEMP_DIR = 'tmp'
+CACHE_DIR = 'cache'
+
+
+def enc(s):
+    return s.encode('utf-8')
+
+
+def dec(s):
+    return s.decode('utf-8')
+
+
+
+class Spice:
+    """
+    Main Class to compute the SPICE metric 
+    """
+
+    def float_convert(self, obj):
+        try:
+          return float(obj)
+        except:
+          return np.nan
+
+    def compute_score(self, gts, res):
+        assert(sorted(gts.keys()) == sorted(res.keys()))
+        imgIds = sorted(gts.keys())
+
+        # Prepare temp input file for the SPICE scorer
+        input_data = []
+        for id in imgIds:
+            hypo = res[id]
+            ref = gts[id]
+
+            # Sanity check.
+            assert(type(hypo) is list)
+            assert(len(hypo) == 1)
+            assert(type(ref) is list)
+            assert(len(ref) >= 1)
+
+            input_data.append({
+              "image_id" : id,
+              "test" : hypo[0],
+              "refs" : ref
+            })
+
+        cwd = os.path.dirname(os.path.abspath(__file__))
+        temp_dir=os.path.join(cwd, TEMP_DIR)
+        if not os.path.exists(temp_dir):
+          os.makedirs(temp_dir)
+        in_file = tempfile.NamedTemporaryFile(delete=False, dir=temp_dir, mode = "w")
+        json.dump(input_data, in_file, indent=2)
+        in_file.close()
+
+        # Start job
+        out_file = tempfile.NamedTemporaryFile(delete=False, dir=temp_dir)
+        out_file.close()
+        cache_dir=os.path.join(cwd, CACHE_DIR)
+        if not os.path.exists(cache_dir):
+          os.makedirs(cache_dir)
+        spice_cmd = ['java', '-jar', '-Xmx8G', SPICE_JAR, in_file.name,
+          '-cache', cache_dir,
+          '-out', out_file.name,
+          '-subset',
+          '-silent'
+        ]
+        subprocess.check_call(spice_cmd, 
+            cwd=os.path.dirname(os.path.abspath(__file__)))
+
+        # Read and process results
+        with open(out_file.name) as data_file:    
+          results = json.load(data_file)
+        os.remove(in_file.name)
+        os.remove(out_file.name)
+
+        imgId_to_scores = {}
+        spice_scores = []
+        for item in results:
+          imgId_to_scores[item['image_id']] = item['scores']
+          spice_scores.append(self.float_convert(item['scores']['All']['f']))
+        average_score = np.mean(np.array(spice_scores))
+        scores = []
+        for image_id in imgIds:
+          # Convert none to NaN before saving scores over subcategories
+          score_set = {}
+          for category,score_tuple in imgId_to_scores[image_id].items():
+            score_set[category] = {k: self.float_convert(v) for k, v in score_tuple.items()}
+          scores.append(score_set)
+        return average_score, scores
+
+    def method(self):
+        return "SPICE"
+
+