Skip to content

Commit

Permalink
feat: auto-detect encoding see #3
Browse files Browse the repository at this point in the history
  • Loading branch information
tympanix committed Nov 10, 2018
1 parent 088c67f commit 39170d5
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 2 deletions.
10 changes: 9 additions & 1 deletion subsync/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from pysrt import SubRipTime
import string
import random
import chardet
import re
from datetime import timedelta

Expand Down Expand Up @@ -103,7 +104,7 @@ class Subtitle:
def __init__(self, media, path):
self.media = media
self.path = path
self.subs = pysrt.open(self.path, encoding='utf-8')
self.subs = pysrt.open(self.path, encoding=self._find_encoding())

def labels(self, subs=None):
if self.media.mfcc is None:
Expand All @@ -119,6 +120,13 @@ def labels(self, subs=None):

return labels

def _find_encoding(self):
data = None
with open(self.path, "rb") as f:
data = f.read()
det = chardet.detect(data)
return det.get("encoding")


def offset(self):
d = self.media.offset
Expand Down
2 changes: 1 addition & 1 deletion subsync/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.1.4'
__version__ = '0.1.5'

0 comments on commit 39170d5

Please sign in to comment.