test(tgi): add granite tests

dacorvo · dacorvo · commit 98db5f87f1d1 · 2024-12-23T16:05:59.000Z
diff --git a/text-generation-inference/tests/fixtures/model.py b/text-generation-inference/tests/fixtures/model.py
@@ -41,6 +41,10 @@
         "model_id": "Qwen/Qwen2.5-0.5B",
         "export_kwargs": {"batch_size": 4, "sequence_length": 4096, "num_cores": 2, "auto_cast_type": "fp16"},
     },
+    "granite": {
+        "model_id": "ibm-granite/granite-3.1-2b-instruct",
+        "export_kwargs": {"batch_size": 4, "sequence_length": 4096, "num_cores": 2, "auto_cast_type": "bf16"},
+    },
 }
 
 
diff --git a/text-generation-inference/tests/integration/test_generate.py b/text-generation-inference/tests/integration/test_generate.py
@@ -25,6 +25,7 @@ async def test_model_single_request(tgi_service):
         "llama": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use",
         "mistral": "\nWhat is Deep Learning?\nDeep Learning is a type of machine learning that",
         "qwen2": " - Part 1\n\nDeep Learning is a subset of Machine Learning that is based on",
+        "granite": "\n\nDeep Learning is a subset of Machine Learning, which is a branch of Art",
     }
     assert response.generated_text == greedy_expectations[service_name]
 
@@ -50,6 +51,11 @@ async def test_model_single_request(tgi_service):
         "llama": "Deep Learning",
         "mistral": "Deep learning",
         "qwen2": "Deep Learning",
+<<<<<<< HEAD
+        "granite": "Deep Learning",
+=======
+        "granite": "Deep learning",
+>>>>>>> 209eb21 (test(tgi): add granite tests)
     }
     assert sample_expectations[service_name] in response
 
@@ -84,6 +90,7 @@ async def test_model_multiple_requests(tgi_service, generate_load):
         "llama": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use",
         "mistral": "\nWhat is Deep Learning?\nDeep Learning is a type of machine learning that",
         "qwen2": " - Part 1\n\nDeep Learning is a subset of Machine Learning that is based on",
+        "granite": "\n\nDeep Learning is a subset of Machine Learning, which is a branch of Art",
     }
     expected = expectations[tgi_service.client.service_name]
     for r in responses:
diff --git a/text-generation-inference/tests/server/test_decode.py b/text-generation-inference/tests/server/test_decode.py
@@ -40,6 +40,7 @@ def _test_decode(config_name, generator, do_sample):
             "llama": "George Orwell, 1984",
             "mistral": "The sky was",
             "qwen2": " A young woman with",
+            "granite": "Aldous Huxley, Brave New World",
         }[config_name]
         assert expected_text in output.text
     else:
@@ -49,5 +50,6 @@ def _test_decode(config_name, generator, do_sample):
             "llama": " George Orwell’s classic dystopian novel, 1984, begins with this ominous sentence. The story",
             "mistral": "\nThe clocks were striking thirteen.\nThe clocks were striking thirteen.",
             "qwen2": " I was sitting in my room, staring at the ceiling, when the door opened and in came a",
+            "granite": "\n\nThis opening line from George Orwell's dystopian novel \"198",
         }[config_name]
         assert output.text == expected_text
diff --git a/text-generation-inference/tests/server/test_prefill.py b/text-generation-inference/tests/server/test_prefill.py
@@ -39,13 +39,15 @@ def _test_prefill(config_name, generator, batch_size, do_sample):
             "llama": [10058, " George"],
             "mistral": [450, " The"],
             "qwen2": [362, " A"],
+            "granite": [429, " -"],
         }[config_name]
     else:
         expectations = {
             "gpt2": [198, "\n"],
             "llama": [10058, " George"],
             "mistral": [13, "\n"],
             "qwen2": [358, " I"],
+            "granite": [203, "\n"],
         }[config_name]
     for g in generations:
         tokens = g.tokens
@@ -80,6 +82,7 @@ def test_prefill_truncate(neuron_model_config):
         "llama": [" —", " The", " He", " He"],
         "mistral": [" He", "\n", " He", " He"],
         "qwen2": [" He", " The", " He", " He"],
+        "granite": ["\n", "\n", " I", " He"],
     }[config_name]
     for i, g in enumerate(generations):
         tokens = g.tokens

Original file line number	Diff line number	Diff line change
`@@ -41,6 +41,10 @@`
`41`	`41`	`"model_id": "Qwen/Qwen2.5-0.5B",`
`42`	`42`	`"export_kwargs": {"batch_size": 4, "sequence_length": 4096, "num_cores": 2, "auto_cast_type": "fp16"},`
`43`	`43`	`},`
	`44`	`+ "granite": {`
	`45`	`+ "model_id": "ibm-granite/granite-3.1-2b-instruct",`
	`46`	`+ "export_kwargs": {"batch_size": 4, "sequence_length": 4096, "num_cores": 2, "auto_cast_type": "bf16"},`
	`47`	`+ },`
`44`	`48`	`}`
`45`	`49`
`46`	`50`