Skip to content

Commit

Permalink
Force reading of title, artist and isrc as UTF-8 (#133)
Browse files Browse the repository at this point in the history
  • Loading branch information
devoxin authored Jun 10, 2024
1 parent 16f1e9f commit 8a91301
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
Expand Down Expand Up @@ -400,7 +401,7 @@ private void parseSegmentInfo(MatroskaElement infoElement) throws IOException {
} else if (child.is(MatroskaElementType.TimecodeScale)) {
timecodeScale = reader.asLong(child);
} else if (child.is(MatroskaElementType.Title) && title == null) {
title = reader.asString(child);
title = reader.asString(child, StandardCharsets.UTF_8);
}

reader.skip(child);
Expand Down Expand Up @@ -453,13 +454,16 @@ private void parseSimpleTag(MatroskaElement simpleTagElement) throws IOException
} else if (child.is(MatroskaElementType.TagString)) {
// https://www.matroska.org/technical/tagging.html
if ("title".equalsIgnoreCase(tagName) && title == null) {
title = reader.asString(child);
title = reader.asString(child, StandardCharsets.UTF_8);
} else if ("artist".equalsIgnoreCase(tagName)) {
artist = reader.asString(child);
artist = reader.asString(child, StandardCharsets.UTF_8);
} else if ("isrc".equalsIgnoreCase(tagName)) {
isrc = reader.asString(child);
// probably not necessary to force a charset here
isrc = reader.asString(child, StandardCharsets.UTF_8);
}
}
}

reader.skip(child);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;

/**
Expand Down Expand Up @@ -153,16 +154,21 @@ public double asDouble(MatroskaElement element) throws IOException {
}
}

public String asString(MatroskaElement element) throws IOException {
return asString(element, null);
}

/**
* @param element Element to read from
* @param forceCharset The charset to use, or null for default.
* @return The contents of the element as a string
* @throws IOException On read error
*/
public String asString(MatroskaElement element) throws IOException {
public String asString(MatroskaElement element, Charset forceCharset) throws IOException {
if (element.is(MatroskaElementType.DataType.STRING)) {
return new String(asBytes(element), StandardCharsets.US_ASCII);
return new String(asBytes(element), forceCharset != null ? forceCharset : StandardCharsets.US_ASCII);
} else if (element.is(MatroskaElementType.DataType.UTF8_STRING)) {
return new String(asBytes(element), StandardCharsets.UTF_8);
return new String(asBytes(element), forceCharset != null ? forceCharset : StandardCharsets.UTF_8);
} else {
throw new IllegalArgumentException("Not a string element.");
}
Expand Down

0 comments on commit 8a91301

Please sign in to comment.