|
| 1 | +/* |
| 2 | + * noVNC: HTML5 VNC client |
| 3 | + * Copyright (C) 2024 The noVNC Authors |
| 4 | + * Licensed under MPL 2.0 (see LICENSE.txt) |
| 5 | + * |
| 6 | + * See README.md for usage and integration instructions. |
| 7 | + * |
| 8 | + */ |
| 9 | + |
| 10 | +import * as Log from '../util/logging.js'; |
| 11 | + |
| 12 | +export class H264Parser { |
| 13 | + constructor(data) { |
| 14 | + this._data = data; |
| 15 | + this._index = 0; |
| 16 | + this.profileIdc = null; |
| 17 | + this.constraintSet = null; |
| 18 | + this.levelIdc = null; |
| 19 | + } |
| 20 | + |
| 21 | + _getStartSequenceLen(index) { |
| 22 | + let data = this._data; |
| 23 | + if (data[index + 0] == 0 && data[index + 1] == 0 && data[index + 2] == 0 && data[index + 3] == 1) { |
| 24 | + return 4; |
| 25 | + } |
| 26 | + if (data[index + 0] == 0 && data[index + 1] == 0 && data[index + 2] == 1) { |
| 27 | + return 3; |
| 28 | + } |
| 29 | + return 0; |
| 30 | + } |
| 31 | + |
| 32 | + _indexOfNextNalUnit(index) { |
| 33 | + let data = this._data; |
| 34 | + for (let i = index; i < data.length; ++i) { |
| 35 | + if (this._getStartSequenceLen(i) != 0) { |
| 36 | + return i; |
| 37 | + } |
| 38 | + } |
| 39 | + return -1; |
| 40 | + } |
| 41 | + |
| 42 | + _parseSps(index) { |
| 43 | + this.profileIdc = this._data[index]; |
| 44 | + this.constraintSet = this._data[index + 1]; |
| 45 | + this.levelIdc = this._data[index + 2]; |
| 46 | + } |
| 47 | + |
| 48 | + _parseNalUnit(index) { |
| 49 | + const firstByte = this._data[index]; |
| 50 | + if (firstByte & 0x80) { |
| 51 | + throw new Error('H264 parsing sanity check failed, forbidden zero bit is set'); |
| 52 | + } |
| 53 | + const unitType = firstByte & 0x1f; |
| 54 | + |
| 55 | + switch (unitType) { |
| 56 | + case 1: // coded slice, non-idr |
| 57 | + return { slice: true }; |
| 58 | + case 5: // coded slice, idr |
| 59 | + return { slice: true, key: true }; |
| 60 | + case 6: // sei |
| 61 | + return {}; |
| 62 | + case 7: // sps |
| 63 | + this._parseSps(index + 1); |
| 64 | + return {}; |
| 65 | + case 8: // pps |
| 66 | + return {}; |
| 67 | + default: |
| 68 | + Log.Warn("Unhandled unit type: ", unitType); |
| 69 | + break; |
| 70 | + } |
| 71 | + return {}; |
| 72 | + } |
| 73 | + |
| 74 | + parse() { |
| 75 | + const startIndex = this._index; |
| 76 | + let isKey = false; |
| 77 | + |
| 78 | + while (this._index < this._data.length) { |
| 79 | + const startSequenceLen = this._getStartSequenceLen(this._index); |
| 80 | + if (startSequenceLen == 0) { |
| 81 | + throw new Error('Invalid start sequence in bit stream'); |
| 82 | + } |
| 83 | + |
| 84 | + const { slice, key } = this._parseNalUnit(this._index + startSequenceLen); |
| 85 | + |
| 86 | + let nextIndex = this._indexOfNextNalUnit(this._index + startSequenceLen); |
| 87 | + if (nextIndex == -1) { |
| 88 | + this._index = this._data.length; |
| 89 | + } else { |
| 90 | + this._index = nextIndex; |
| 91 | + } |
| 92 | + |
| 93 | + if (key) { |
| 94 | + isKey = true; |
| 95 | + } |
| 96 | + if (slice) { |
| 97 | + break; |
| 98 | + } |
| 99 | + } |
| 100 | + |
| 101 | + if (startIndex === this._index) { |
| 102 | + return null; |
| 103 | + } |
| 104 | + |
| 105 | + return { |
| 106 | + frame: this._data.subarray(startIndex, this._index), |
| 107 | + key: isKey, |
| 108 | + }; |
| 109 | + } |
| 110 | +} |
| 111 | + |
| 112 | +export class H264Context { |
| 113 | + constructor(width, height) { |
| 114 | + this.lastUsed = 0; |
| 115 | + this._width = width; |
| 116 | + this._height = height; |
| 117 | + this._profileIdc = null; |
| 118 | + this._constraintSet = null; |
| 119 | + this._levelIdc = null; |
| 120 | + this._decoder = null; |
| 121 | + this._pendingFrames = []; |
| 122 | + } |
| 123 | + |
| 124 | + _handleFrame(frame) { |
| 125 | + let pending = this._pendingFrames.shift(); |
| 126 | + if (pending === undefined) { |
| 127 | + throw new Error("Pending frame queue empty when receiving frame from decoder"); |
| 128 | + } |
| 129 | + |
| 130 | + if (pending.timestamp != frame.timestamp) { |
| 131 | + throw new Error("Video frame timestamp mismatch. Expected " + |
| 132 | + frame.timestamp + " but but got " + pending.timestamp); |
| 133 | + } |
| 134 | + |
| 135 | + pending.frame = frame; |
| 136 | + pending.ready = true; |
| 137 | + pending.resolve(); |
| 138 | + |
| 139 | + if (!pending.keep) { |
| 140 | + frame.close(); |
| 141 | + } |
| 142 | + } |
| 143 | + |
| 144 | + _handleError(e) { |
| 145 | + throw new Error("Failed to decode frame: " + e.message); |
| 146 | + } |
| 147 | + |
| 148 | + _configureDecoder(profileIdc, constraintSet, levelIdc) { |
| 149 | + if (this._decoder === null || this._decoder.state === 'closed') { |
| 150 | + this._decoder = new VideoDecoder({ |
| 151 | + output: frame => this._handleFrame(frame), |
| 152 | + error: e => this._handleError(e), |
| 153 | + }); |
| 154 | + } |
| 155 | + const codec = 'avc1.' + |
| 156 | + profileIdc.toString(16).padStart(2, '0') + |
| 157 | + constraintSet.toString(16).padStart(2, '0') + |
| 158 | + levelIdc.toString(16).padStart(2, '0'); |
| 159 | + this._decoder.configure({ |
| 160 | + codec: codec, |
| 161 | + codedWidth: this._width, |
| 162 | + codedHeight: this._height, |
| 163 | + optimizeForLatency: true, |
| 164 | + }); |
| 165 | + } |
| 166 | + |
| 167 | + _preparePendingFrame(timestamp) { |
| 168 | + let pending = { |
| 169 | + timestamp: timestamp, |
| 170 | + promise: null, |
| 171 | + resolve: null, |
| 172 | + frame: null, |
| 173 | + ready: false, |
| 174 | + keep: false, |
| 175 | + }; |
| 176 | + pending.promise = new Promise((resolve) => { |
| 177 | + pending.resolve = resolve; |
| 178 | + }); |
| 179 | + this._pendingFrames.push(pending); |
| 180 | + |
| 181 | + return pending; |
| 182 | + } |
| 183 | + |
| 184 | + decode(payload) { |
| 185 | + let parser = new H264Parser(payload); |
| 186 | + let result = null; |
| 187 | + |
| 188 | + // Ideally, this timestamp should come from the server, but we'll just |
| 189 | + // approximate it instead. |
| 190 | + let timestamp = Math.round(window.performance.now() * 1e3); |
| 191 | + |
| 192 | + while (true) { |
| 193 | + let encodedFrame = parser.parse(); |
| 194 | + if (encodedFrame === null) { |
| 195 | + break; |
| 196 | + } |
| 197 | + |
| 198 | + if (parser.profileIdc !== null) { |
| 199 | + self._profileIdc = parser.profileIdc; |
| 200 | + self._constraintSet = parser.constraintSet; |
| 201 | + self._levelIdc = parser.levelIdc; |
| 202 | + } |
| 203 | + |
| 204 | + if (this._decoder === null || this._decoder.state !== 'configured') { |
| 205 | + if (!encodedFrame.key) { |
| 206 | + Log.Warn("Missing key frame. Can't decode until one arrives"); |
| 207 | + continue; |
| 208 | + } |
| 209 | + if (self._profileIdc === null) { |
| 210 | + Log.Warn('Cannot config decoder. Have not received SPS and PPS yet.'); |
| 211 | + continue; |
| 212 | + } |
| 213 | + this._configureDecoder(self._profileIdc, self._constraintSet, |
| 214 | + self._levelIdc); |
| 215 | + } |
| 216 | + |
| 217 | + result = this._preparePendingFrame(timestamp); |
| 218 | + |
| 219 | + const chunk = new EncodedVideoChunk({ |
| 220 | + timestamp: timestamp, |
| 221 | + type: encodedFrame.key ? 'key' : 'delta', |
| 222 | + data: encodedFrame.frame, |
| 223 | + }); |
| 224 | + |
| 225 | + try { |
| 226 | + this._decoder.decode(chunk); |
| 227 | + } catch (e) { |
| 228 | + Log.Warn("Failed to decode:", e); |
| 229 | + } |
| 230 | + } |
| 231 | + |
| 232 | + // We only keep last frame of each payload |
| 233 | + if (result !== null) { |
| 234 | + result.keep = true; |
| 235 | + } |
| 236 | + |
| 237 | + return result; |
| 238 | + } |
| 239 | +} |
| 240 | + |
| 241 | +export default class H264Decoder { |
| 242 | + constructor() { |
| 243 | + this._tick = 0; |
| 244 | + this._contexts = {}; |
| 245 | + } |
| 246 | + |
| 247 | + _contextId(x, y, width, height) { |
| 248 | + return [x, y, width, height].join(','); |
| 249 | + } |
| 250 | + |
| 251 | + _findOldestContextId() { |
| 252 | + let oldestTick = Number.MAX_VALUE; |
| 253 | + let oldestKey = undefined; |
| 254 | + for (const [key, value] of Object.entries(this._contexts)) { |
| 255 | + if (value.lastUsed < oldestTick) { |
| 256 | + oldestTick = value.lastUsed; |
| 257 | + oldestKey = key; |
| 258 | + } |
| 259 | + } |
| 260 | + return oldestKey; |
| 261 | + } |
| 262 | + |
| 263 | + _createContext(x, y, width, height) { |
| 264 | + const maxContexts = 64; |
| 265 | + if (Object.keys(this._contexts).length >= maxContexts) { |
| 266 | + let oldestContextId = this._findOldestContextId(); |
| 267 | + delete this._contexts[oldestContextId]; |
| 268 | + } |
| 269 | + let context = new H264Context(width, height); |
| 270 | + this._contexts[this._contextId(x, y, width, height)] = context; |
| 271 | + return context; |
| 272 | + } |
| 273 | + |
| 274 | + _getContext(x, y, width, height) { |
| 275 | + let context = this._contexts[this._contextId(x, y, width, height)]; |
| 276 | + return context !== undefined ? context : this._createContext(x, y, width, height); |
| 277 | + } |
| 278 | + |
| 279 | + _resetContext(x, y, width, height) { |
| 280 | + delete this._contexts[this._contextId(x, y, width, height)]; |
| 281 | + } |
| 282 | + |
| 283 | + _resetAllContexts() { |
| 284 | + this._contexts = {}; |
| 285 | + } |
| 286 | + |
| 287 | + decodeRect(x, y, width, height, sock, display, depth) { |
| 288 | + const resetContextFlag = 1; |
| 289 | + const resetAllContextsFlag = 2; |
| 290 | + |
| 291 | + if (sock.rQwait("h264 header", 8)) { |
| 292 | + return false; |
| 293 | + } |
| 294 | + |
| 295 | + const length = sock.rQshift32(); |
| 296 | + const flags = sock.rQshift32(); |
| 297 | + |
| 298 | + if (sock.rQwait("h264 payload", length, 8)) { |
| 299 | + return false; |
| 300 | + } |
| 301 | + |
| 302 | + if (flags & resetAllContextsFlag) { |
| 303 | + this._resetAllContexts(); |
| 304 | + } else if (flags & resetContextFlag) { |
| 305 | + this._resetContext(x, y, width, height); |
| 306 | + } |
| 307 | + |
| 308 | + let context = this._getContext(x, y, width, height); |
| 309 | + context.lastUsed = this._tick++; |
| 310 | + |
| 311 | + if (length !== 0) { |
| 312 | + let payload = sock.rQshiftBytes(length, false); |
| 313 | + let frame = context.decode(payload); |
| 314 | + if (frame !== null) { |
| 315 | + display.videoFrame(x, y, width, height, frame); |
| 316 | + } |
| 317 | + } |
| 318 | + |
| 319 | + return true; |
| 320 | + } |
| 321 | +} |
0 commit comments