-
Notifications
You must be signed in to change notification settings - Fork 189
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[C#] feat: Switch tokenizer to use the Microsoft.ML.Tokenizers library (
#1466) fixes #1467 ## Details Switch to use Microsoft.ML.tokenizers library for tokenizer support. #### Change details > Describe your changes, with screenshots and code snippets as appropriate **code snippets**: **screenshots**: ## Attestation Checklist - [x] My code follows the style guidelines of this project - I have checked for/fixed spelling, linting, and other errors - I have commented my code for clarity - I have made corresponding changes to the documentation (updating the doc strings in the code is sufficient) - My changes generate no new warnings - I have added tests that validates my changes, and provides sufficient test coverage. I have tested with: - Local testing - E2E testing in Teams - New and existing unit tests pass locally with my changes
- Loading branch information
Showing
10 changed files
with
53 additions
and
33 deletions.
There are no files selected for viewing
26 changes: 26 additions & 0 deletions
26
dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI.Tests/UtilitiesTests/TokenizerTests.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
using Microsoft.Teams.AI.AI.Tokenizers; | ||
|
||
namespace Microsoft.Teams.AI.Tests.UtilitiesTests | ||
{ | ||
public class TokenizerTests | ||
{ | ||
public static IEnumerable<object[]> TokenizersObjects() | ||
{ | ||
yield return new object[] { new GPTTokenizer() }; | ||
yield return new object[] { new GPTTokenizer("gpt-4") }; | ||
} | ||
|
||
|
||
[Theory] | ||
[MemberData(nameof(TokenizersObjects))] | ||
public void ValidateResults(ITokenizer tokenizer) | ||
{ | ||
string text = "Hello, World"; | ||
Assert.NotNull(tokenizer); | ||
IReadOnlyList<int> tokens = tokenizer.Encode(text); | ||
|
||
Assert.Equal(new int[] { 9906, 11, 4435 }, tokens); | ||
Assert.Equal(text, tokenizer.Decode(tokens)); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
<?xml version="1.0" encoding="utf-8"?> | ||
<configuration> | ||
<packageSources> | ||
<add key="dotnet-local-feed" value=" https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-libraries/nuget/v3/index.json" /> | ||
<add key="nuget" value="https://api.nuget.org/v3/index.json" /> | ||
</packageSources> | ||
</configuration> |