Solution of UTF-8 validation and Misssing Ranges

Garvit244 · Garvit244 · commit 48fc33dd204f · 2019-06-05T12:38:30.000+08:00
diff --git a/100-200q/163.py b/100-200q/163.py
@@ -0,0 +1,43 @@
+'''
+Given a sorted integer array where the range of elements are in the inclusive range [lower, upper], return its missing ranges.
+
+For example, given [0, 1, 3, 50, 75], lower = 0 and upper = 99, return ["2", "4->49", "51->74", "76->99"].
+'''
+
+class Solution(object):
+	def missingRange(self, A, lower, upper):
+		if not A:
+			return []
+
+		result = []
+		if A[0] != lower:
+			end = A[0] - 1
+			if end == lower:
+				m_r = str(lower)
+			else:
+				m_r = str(lower) + "->" + str(end)
+			result.append(m_r)
+
+		for index in range(1, len(A)):
+			if A[index] != A[index-1] + 1:
+				start = A[index-1] + 1
+				end = A[index] - 1
+				if start == end:
+					m_r = str(start)
+				else:
+					m_r = str(start) + "->" + str(end)
+				result.append(m_r)
+
+		if A[len(A) - 1] != upper:
+			start = A[len(A)-1] + 1
+			if start == upper:
+				m_r = str(start)
+			else:
+				m_r = str(start) + "->" + str(upper)
+			result.append(m_r)
+		return result
+
+solution = Solution()
+print solution.missingRange([0, 1, 3, 50, 75], 0, 99)
+print solution.missingRange([4, 10, 50, 98], 0, 99)
+print solution.missingRange([0], 0, 1)
diff --git a/300-400q/393.py b/300-400q/393.py
@@ -0,0 +1,66 @@
+'''
+A character in UTF8 can be from 1 to 4 bytes long, subjected to the following rules:
+
+For 1-byte character, the first bit is a 0, followed by its unicode code.
+For n-bytes character, the first n-bits are all one's, the n+1 bit is 0, followed by n-1 bytes with most significant 2 bits being 10.
+This is how the UTF-8 encoding would work:
+
+   Char. number range  |        UTF-8 octet sequence
+      (hexadecimal)    |              (binary)
+   --------------------+---------------------------------------------
+   0000 0000-0000 007F | 0xxxxxxx
+   0000 0080-0000 07FF | 110xxxxx 10xxxxxx
+   0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
+   0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+Given an array of integers representing the data, return whether it is a valid utf-8 encoding.
+
+Note:
+The input is an array of integers. Only the least significant 8 bits of each integer is used to store the data. This means each integer represents only 1 byte of data.
+
+Example 1:
+
+data = [197, 130, 1], which represents the octet sequence: 11000101 10000010 00000001.
+
+Return true.
+It is a valid utf-8 encoding for a 2-bytes character followed by a 1-byte character.
+Example 2:
+
+data = [235, 140, 4], which represented the octet sequence: 11101011 10001100 00000100.
+
+Return false.
+The first 3 bits are all one's and the 4th bit is 0 means it is a 3-bytes character.
+The next byte is a continuation byte which starts with 10 and that's correct.
+But the second continuation byte does not start with 10, so it is invalid.
+'''
+
+class Solution(object):
+    def validUtf8(self, data):
+        """
+        :type data: List[int]
+        :rtype: bool
+        """
+        seveneth_mask = 1 << 7
+        sixth_mask = 1 << 6
+        no_bytes = 0
+        
+        if len(data) == 1:
+            return not(data[0] & seveneth_mask)
+        
+        for num in data:
+            if no_bytes == 0:
+                mask = 1 << 7
+                
+                while num & mask:
+                    no_bytes += 1
+                    mask >>= 1
+                    
+                if no_bytes == 0:
+                    continue
+                    
+                if no_bytes == 1 or no_bytes > 4:
+                    return False
+            else:
+                if not(num & seveneth_mask and not(num & sixth_mask)):
+                    return False
+            no_bytes -= 1
+        return no_bytes == 0
diff --git a/README.md b/README.md
@@ -104,6 +104,7 @@ Python solution of problems from [LeetCode](https://leetcode.com/).
 ##### [Problems 300-400](./300-400q/)
 | # | Title | Solution | Difficulty |
 |---| ----- | -------- | ---------- |
+|393|[UTF-8 Validation](https://leetcode.com/problems/utf-8-validation/)|[Python](./300-400q/393.py)|Medium|
 |388|[Longest Absolute File Path](https://leetcode.com/problems/longest-absolute-file-path)|[Python](./300-400q/388.py)|Medium|
 |387|[First Unique Character in a String](https://leetcode.com/problems/first-unique-character-in-a-string/) | [Python](./300-400q/387.py)|Easy|
 |380|[Insert Delete GetRandom O(1)](https://leetcode.com/problems/insert-delete-getrandom-o1/) | [Python](./300-400q/380.py)|Hard|
@@ -164,6 +165,7 @@ Python solution of problems from [LeetCode](https://leetcode.com/).
 |179|[Largest Number](https://leetcode.com/problems/largest-number/) | [Python](./100-200q/179.py)|Medium|
 |173|[Binary Search Tree Iterator](https://leetcode.com/problems/binary-search-tree-iterator)|[Python](./100-200q/173.py)|Medium|
 |170|[Two Sum III - Data structure design](https://leetcode.com/problems/two-sum-iii-data-structure-design)|[Python](./100-200q/170.py)|Easy|
+|163|[Missing Ranges](https://leetcode.com/problems/missing-ranges)[Python](./100-200q/163.py)|Medium|
 |162|[Find Peak Element](https://leetcode.com/problems/find-peak-element/) | [Python](./100-200q/162.py)|Medium|
 |160|[Intersection of Two Linked Lists](https://leetcode.com/problems/intersection-of-two-linked-lists/) | [Python](./100-200q/160.py)|Easy|
 |159|[Longest Substring Which Contains 2 Unique Characters](https://leetcode.com/problems/longest-substring-with-at-most-two-distinct-characters)|[Python](./100-200q/159.py)|Hard|