Home Reference Source

src/utils/mp4-tools.ts

  1. import { sliceUint8 } from './typed-array';
  2. import { ElementaryStreamTypes } from '../loader/fragment';
  3. import { PassthroughTrack, UserdataSample } from '../types/demuxer';
  4. import { utf8ArrayToStr } from '../demux/id3';
  5.  
  6. const UINT32_MAX = Math.pow(2, 32) - 1;
  7. const push = [].push;
  8.  
  9. // We are using fixed track IDs for driving the MP4 remuxer
  10. // instead of following the TS PIDs.
  11. // There is no reason not to do this and some browsers/SourceBuffer-demuxers
  12. // may not like if there are TrackID "switches"
  13. // See https://github.com/video-dev/hls.js/issues/1331
  14. // Here we are mapping our internal track types to constant MP4 track IDs
  15. // With MSE currently one can only have one track of each, and we are muxing
  16. // whatever video/audio rendition in them.
  17. export const RemuxerTrackIdConfig = {
  18. video: 1,
  19. audio: 2,
  20. id3: 3,
  21. text: 4,
  22. };
  23.  
  24. export function bin2str(data: Uint8Array): string {
  25. return String.fromCharCode.apply(null, data);
  26. }
  27.  
  28. export function readUint16(buffer: Uint8Array, offset: number): number {
  29. const val = (buffer[offset] << 8) | buffer[offset + 1];
  30. return val < 0 ? 65536 + val : val;
  31. }
  32.  
  33. export function readUint32(buffer: Uint8Array, offset: number): number {
  34. const val = readSint32(buffer, offset);
  35. return val < 0 ? 4294967296 + val : val;
  36. }
  37.  
  38. export function readSint32(buffer: Uint8Array, offset: number): number {
  39. return (
  40. (buffer[offset] << 24) |
  41. (buffer[offset + 1] << 16) |
  42. (buffer[offset + 2] << 8) |
  43. buffer[offset + 3]
  44. );
  45. }
  46.  
  47. export function writeUint32(buffer: Uint8Array, offset: number, value: number) {
  48. buffer[offset] = value >> 24;
  49. buffer[offset + 1] = (value >> 16) & 0xff;
  50. buffer[offset + 2] = (value >> 8) & 0xff;
  51. buffer[offset + 3] = value & 0xff;
  52. }
  53.  
  54. // Find the data for a box specified by its path
  55. export function findBox(data: Uint8Array, path: string[]): Uint8Array[] {
  56. const results = [] as Uint8Array[];
  57. if (!path.length) {
  58. // short-circuit the search for empty paths
  59. return results;
  60. }
  61. const end = data.byteLength;
  62.  
  63. for (let i = 0; i < end; ) {
  64. const size = readUint32(data, i);
  65. const type = bin2str(data.subarray(i + 4, i + 8));
  66. const endbox = size > 1 ? i + size : end;
  67.  
  68. if (type === path[0]) {
  69. if (path.length === 1) {
  70. // this is the end of the path and we've found the box we were
  71. // looking for
  72. results.push(data.subarray(i + 8, endbox));
  73. } else {
  74. // recursively search for the next box along the path
  75. const subresults = findBox(data.subarray(i + 8, endbox), path.slice(1));
  76. if (subresults.length) {
  77. push.apply(results, subresults);
  78. }
  79. }
  80. }
  81. i = endbox;
  82. }
  83.  
  84. // we've finished searching all of data
  85. return results;
  86. }
  87.  
  88. type SidxInfo = {
  89. earliestPresentationTime: number;
  90. timescale: number;
  91. version: number;
  92. referencesCount: number;
  93. references: any[];
  94. moovEndOffset: number | null;
  95. };
  96.  
  97. export function parseSegmentIndex(initSegment: Uint8Array): SidxInfo | null {
  98. const moovBox = findBox(initSegment, ['moov']);
  99. const moov = moovBox[0];
  100. const moovEndOffset = moov ? moov.length : null; // we need this in case we need to chop of garbage of the end of current data
  101.  
  102. const sidxBox = findBox(initSegment, ['sidx']);
  103.  
  104. if (!sidxBox || !sidxBox[0]) {
  105. return null;
  106. }
  107.  
  108. const references: any[] = [];
  109. const sidx = sidxBox[0];
  110.  
  111. const version = sidx[0];
  112.  
  113. // set initial offset, we skip the reference ID (not needed)
  114. let index = version === 0 ? 8 : 16;
  115.  
  116. const timescale = readUint32(sidx, index);
  117. index += 4;
  118.  
  119. // TODO: parse earliestPresentationTime and firstOffset
  120. // usually zero in our case
  121. const earliestPresentationTime = 0;
  122. const firstOffset = 0;
  123.  
  124. if (version === 0) {
  125. index += 8;
  126. } else {
  127. index += 16;
  128. }
  129.  
  130. // skip reserved
  131. index += 2;
  132.  
  133. let startByte = sidx.length + firstOffset;
  134.  
  135. const referencesCount = readUint16(sidx, index);
  136. index += 2;
  137.  
  138. for (let i = 0; i < referencesCount; i++) {
  139. let referenceIndex = index;
  140.  
  141. const referenceInfo = readUint32(sidx, referenceIndex);
  142. referenceIndex += 4;
  143.  
  144. const referenceSize = referenceInfo & 0x7fffffff;
  145. const referenceType = (referenceInfo & 0x80000000) >>> 31;
  146.  
  147. if (referenceType === 1) {
  148. // eslint-disable-next-line no-console
  149. console.warn('SIDX has hierarchical references (not supported)');
  150. return null;
  151. }
  152.  
  153. const subsegmentDuration = readUint32(sidx, referenceIndex);
  154. referenceIndex += 4;
  155.  
  156. references.push({
  157. referenceSize,
  158. subsegmentDuration, // unscaled
  159. info: {
  160. duration: subsegmentDuration / timescale,
  161. start: startByte,
  162. end: startByte + referenceSize - 1,
  163. },
  164. });
  165.  
  166. startByte += referenceSize;
  167.  
  168. // Skipping 1 bit for |startsWithSap|, 3 bits for |sapType|, and 28 bits
  169. // for |sapDelta|.
  170. referenceIndex += 4;
  171.  
  172. // skip to next ref
  173. index = referenceIndex;
  174. }
  175.  
  176. return {
  177. earliestPresentationTime,
  178. timescale,
  179. version,
  180. referencesCount,
  181. references,
  182. moovEndOffset,
  183. };
  184. }
  185.  
  186. /**
  187. * Parses an MP4 initialization segment and extracts stream type and
  188. * timescale values for any declared tracks. Timescale values indicate the
  189. * number of clock ticks per second to assume for time-based values
  190. * elsewhere in the MP4.
  191. *
  192. * To determine the start time of an MP4, you need two pieces of
  193. * information: the timescale unit and the earliest base media decode
  194. * time. Multiple timescales can be specified within an MP4 but the
  195. * base media decode time is always expressed in the timescale from
  196. * the media header box for the track:
  197. * ```
  198. * moov > trak > mdia > mdhd.timescale
  199. * moov > trak > mdia > hdlr
  200. * ```
  201. * @param initSegment {Uint8Array} the bytes of the init segment
  202. * @return {InitData} a hash of track type to timescale values or null if
  203. * the init segment is malformed.
  204. */
  205.  
  206. export interface InitDataTrack {
  207. timescale: number;
  208. id: number;
  209. codec: string;
  210. }
  211.  
  212. type HdlrType = ElementaryStreamTypes.AUDIO | ElementaryStreamTypes.VIDEO;
  213.  
  214. export interface InitData extends Array<any> {
  215. [index: number]:
  216. | {
  217. timescale: number;
  218. type: HdlrType;
  219. default?: {
  220. duration: number;
  221. flags: number;
  222. };
  223. }
  224. | undefined;
  225. audio?: InitDataTrack;
  226. video?: InitDataTrack;
  227. caption?: InitDataTrack;
  228. }
  229.  
  230. export function parseInitSegment(initSegment: Uint8Array): InitData {
  231. const result: InitData = [];
  232. const traks = findBox(initSegment, ['moov', 'trak']);
  233. for (let i = 0; i < traks.length; i++) {
  234. const trak = traks[i];
  235. const tkhd = findBox(trak, ['tkhd'])[0];
  236. if (tkhd) {
  237. let version = tkhd[0];
  238. let index = version === 0 ? 12 : 20;
  239. const trackId = readUint32(tkhd, index);
  240. const mdhd = findBox(trak, ['mdia', 'mdhd'])[0];
  241. if (mdhd) {
  242. version = mdhd[0];
  243. index = version === 0 ? 12 : 20;
  244. const timescale = readUint32(mdhd, index);
  245. const hdlr = findBox(trak, ['mdia', 'hdlr'])[0];
  246. if (hdlr) {
  247. const hdlrType = bin2str(hdlr.subarray(8, 12));
  248. const type: HdlrType | undefined = {
  249. soun: ElementaryStreamTypes.AUDIO as const,
  250. vide: ElementaryStreamTypes.VIDEO as const,
  251. }[hdlrType];
  252. if (type) {
  253. // Parse codec details
  254. const stsd = findBox(trak, ['mdia', 'minf', 'stbl', 'stsd'])[0];
  255. let codec;
  256. if (stsd) {
  257. codec = bin2str(stsd.subarray(12, 16));
  258. // TODO: Parse codec details to be able to build MIME type.
  259. // stsd.start += 8;
  260. // const codecBox = findBox(stsd, [codec])[0];
  261. // if (codecBox) {
  262. // TODO: Codec parsing support for avc1, mp4a, hevc, av01...
  263. // }
  264. }
  265. result[trackId] = { timescale, type };
  266. result[type] = { timescale, id: trackId, codec };
  267. }
  268. }
  269. }
  270. }
  271. }
  272.  
  273. const trex = findBox(initSegment, ['moov', 'mvex', 'trex']);
  274. trex.forEach((trex) => {
  275. const trackId = readUint32(trex, 4);
  276. const track = result[trackId];
  277. if (track) {
  278. track.default = {
  279. duration: readUint32(trex, 12),
  280. flags: readUint32(trex, 20),
  281. };
  282. }
  283. });
  284.  
  285. return result;
  286. }
  287.  
  288. /**
  289. * Determine the base media decode start time, in seconds, for an MP4
  290. * fragment. If multiple fragments are specified, the earliest time is
  291. * returned.
  292. *
  293. * The base media decode time can be parsed from track fragment
  294. * metadata:
  295. * ```
  296. * moof > traf > tfdt.baseMediaDecodeTime
  297. * ```
  298. * It requires the timescale value from the mdhd to interpret.
  299. *
  300. * @param initData {InitData} a hash of track type to timescale values
  301. * @param fmp4 {Uint8Array} the bytes of the mp4 fragment
  302. * @return {number} the earliest base media decode start time for the
  303. * fragment, in seconds
  304. */
  305. export function getStartDTS(initData: InitData, fmp4: Uint8Array): number {
  306. // we need info from two children of each track fragment box
  307. return (
  308. findBox(fmp4, ['moof', 'traf']).reduce((result: number | null, traf) => {
  309. const tfdt = findBox(traf, ['tfdt'])[0];
  310. const version = tfdt[0];
  311. const start = findBox(traf, ['tfhd']).reduce(
  312. (result: number | null, tfhd) => {
  313. // get the track id from the tfhd
  314. const id = readUint32(tfhd, 4);
  315. const track = initData[id];
  316. if (track) {
  317. let baseTime = readUint32(tfdt, 4);
  318. if (version === 1) {
  319. baseTime *= Math.pow(2, 32);
  320. baseTime += readUint32(tfdt, 8);
  321. }
  322. // assume a 90kHz clock if no timescale was specified
  323. const scale = track.timescale || 90e3;
  324. // convert base time to seconds
  325. const startTime = baseTime / scale;
  326. if (
  327. isFinite(startTime) &&
  328. (result === null || startTime < result)
  329. ) {
  330. return startTime;
  331. }
  332. }
  333. return result;
  334. },
  335. null
  336. );
  337. if (
  338. start !== null &&
  339. isFinite(start) &&
  340. (result === null || start < result)
  341. ) {
  342. return start;
  343. }
  344. return result;
  345. }, null) || 0
  346. );
  347. }
  348.  
  349. /*
  350. For Reference:
  351. aligned(8) class TrackFragmentHeaderBox
  352. extends FullBox(‘tfhd’, 0, tf_flags){
  353. unsigned int(32) track_ID;
  354. // all the following are optional fields
  355. unsigned int(64) base_data_offset;
  356. unsigned int(32) sample_description_index;
  357. unsigned int(32) default_sample_duration;
  358. unsigned int(32) default_sample_size;
  359. unsigned int(32) default_sample_flags
  360. }
  361. */
  362. export function getDuration(data: Uint8Array, initData: InitData) {
  363. let rawDuration = 0;
  364. let videoDuration = 0;
  365. let audioDuration = 0;
  366. const trafs = findBox(data, ['moof', 'traf']);
  367. for (let i = 0; i < trafs.length; i++) {
  368. const traf = trafs[i];
  369. // There is only one tfhd & trun per traf
  370. // This is true for CMAF style content, and we should perhaps check the ftyp
  371. // and only look for a single trun then, but for ISOBMFF we should check
  372. // for multiple track runs.
  373. const tfhd = findBox(traf, ['tfhd'])[0];
  374. // get the track id from the tfhd
  375. const id = readUint32(tfhd, 4);
  376. const track = initData[id];
  377. if (!track) {
  378. continue;
  379. }
  380. const trackDefault = track.default;
  381. const tfhdFlags = readUint32(tfhd, 0) | trackDefault?.flags!;
  382. let sampleDuration: number | undefined = trackDefault?.duration;
  383. if (tfhdFlags & 0x000008) {
  384. // 0x000008 indicates the presence of the default_sample_duration field
  385. if (tfhdFlags & 0x000002) {
  386. // 0x000002 indicates the presence of the sample_description_index field, which precedes default_sample_duration
  387. // If present, the default_sample_duration exists at byte offset 12
  388. sampleDuration = readUint32(tfhd, 12);
  389. } else {
  390. // Otherwise, the duration is at byte offset 8
  391. sampleDuration = readUint32(tfhd, 8);
  392. }
  393. }
  394. // assume a 90kHz clock if no timescale was specified
  395. const timescale = track.timescale || 90e3;
  396. const truns = findBox(traf, ['trun']);
  397. for (let j = 0; j < truns.length; j++) {
  398. rawDuration = computeRawDurationFromSamples(truns[j]);
  399. if (!rawDuration && sampleDuration) {
  400. const sampleCount = readUint32(truns[j], 4);
  401. rawDuration = sampleDuration * sampleCount;
  402. }
  403. if (track.type === ElementaryStreamTypes.VIDEO) {
  404. videoDuration += rawDuration / timescale;
  405. } else if (track.type === ElementaryStreamTypes.AUDIO) {
  406. audioDuration += rawDuration / timescale;
  407. }
  408. }
  409. }
  410. if (videoDuration === 0 && audioDuration === 0) {
  411. // If duration samples are not available in the traf use sidx subsegment_duration
  412. const sidx = parseSegmentIndex(data);
  413. if (sidx?.references) {
  414. return sidx.references.reduce(
  415. (dur, ref) => dur + ref.info.duration || 0,
  416. 0
  417. );
  418. }
  419. }
  420. if (videoDuration) {
  421. return videoDuration;
  422. }
  423. return audioDuration;
  424. }
  425.  
  426. /*
  427. For Reference:
  428. aligned(8) class TrackRunBox
  429. extends FullBox(‘trun’, version, tr_flags) {
  430. unsigned int(32) sample_count;
  431. // the following are optional fields
  432. signed int(32) data_offset;
  433. unsigned int(32) first_sample_flags;
  434. // all fields in the following array are optional
  435. {
  436. unsigned int(32) sample_duration;
  437. unsigned int(32) sample_size;
  438. unsigned int(32) sample_flags
  439. if (version == 0)
  440. { unsigned int(32)
  441. else
  442. { signed int(32)
  443. }[ sample_count ]
  444. }
  445. */
  446. export function computeRawDurationFromSamples(trun): number {
  447. const flags = readUint32(trun, 0);
  448. // Flags are at offset 0, non-optional sample_count is at offset 4. Therefore we start 8 bytes in.
  449. // Each field is an int32, which is 4 bytes
  450. let offset = 8;
  451. // data-offset-present flag
  452. if (flags & 0x000001) {
  453. offset += 4;
  454. }
  455. // first-sample-flags-present flag
  456. if (flags & 0x000004) {
  457. offset += 4;
  458. }
  459.  
  460. let duration = 0;
  461. const sampleCount = readUint32(trun, 4);
  462. for (let i = 0; i < sampleCount; i++) {
  463. // sample-duration-present flag
  464. if (flags & 0x000100) {
  465. const sampleDuration = readUint32(trun, offset);
  466. duration += sampleDuration;
  467. offset += 4;
  468. }
  469. // sample-size-present flag
  470. if (flags & 0x000200) {
  471. offset += 4;
  472. }
  473. // sample-flags-present flag
  474. if (flags & 0x000400) {
  475. offset += 4;
  476. }
  477. // sample-composition-time-offsets-present flag
  478. if (flags & 0x000800) {
  479. offset += 4;
  480. }
  481. }
  482. return duration;
  483. }
  484.  
  485. export function offsetStartDTS(
  486. initData: InitData,
  487. fmp4: Uint8Array,
  488. timeOffset: number
  489. ) {
  490. findBox(fmp4, ['moof', 'traf']).forEach((traf) => {
  491. findBox(traf, ['tfhd']).forEach((tfhd) => {
  492. // get the track id from the tfhd
  493. const id = readUint32(tfhd, 4);
  494. const track = initData[id];
  495. if (!track) {
  496. return;
  497. }
  498. // assume a 90kHz clock if no timescale was specified
  499. const timescale = track.timescale || 90e3;
  500. // get the base media decode time from the tfdt
  501. findBox(traf, ['tfdt']).forEach((tfdt) => {
  502. const version = tfdt[0];
  503. let baseMediaDecodeTime = readUint32(tfdt, 4);
  504. if (version === 0) {
  505. writeUint32(tfdt, 4, baseMediaDecodeTime - timeOffset * timescale);
  506. } else {
  507. baseMediaDecodeTime *= Math.pow(2, 32);
  508. baseMediaDecodeTime += readUint32(tfdt, 8);
  509. baseMediaDecodeTime -= timeOffset * timescale;
  510. baseMediaDecodeTime = Math.max(baseMediaDecodeTime, 0);
  511. const upper = Math.floor(baseMediaDecodeTime / (UINT32_MAX + 1));
  512. const lower = Math.floor(baseMediaDecodeTime % (UINT32_MAX + 1));
  513. writeUint32(tfdt, 4, upper);
  514. writeUint32(tfdt, 8, lower);
  515. }
  516. });
  517. });
  518. });
  519. }
  520.  
  521. // TODO: Check if the last moof+mdat pair is part of the valid range
  522. export function segmentValidRange(data: Uint8Array): SegmentedRange {
  523. const segmentedRange: SegmentedRange = {
  524. valid: null,
  525. remainder: null,
  526. };
  527.  
  528. const moofs = findBox(data, ['moof']);
  529. if (!moofs) {
  530. return segmentedRange;
  531. } else if (moofs.length < 2) {
  532. segmentedRange.remainder = data;
  533. return segmentedRange;
  534. }
  535. const last = moofs[moofs.length - 1];
  536. // Offset by 8 bytes; findBox offsets the start by as much
  537. segmentedRange.valid = sliceUint8(data, 0, last.byteOffset - 8);
  538. segmentedRange.remainder = sliceUint8(data, last.byteOffset - 8);
  539. return segmentedRange;
  540. }
  541.  
  542. export interface SegmentedRange {
  543. valid: Uint8Array | null;
  544. remainder: Uint8Array | null;
  545. }
  546.  
  547. export function appendUint8Array(
  548. data1: Uint8Array,
  549. data2: Uint8Array
  550. ): Uint8Array {
  551. const temp = new Uint8Array(data1.length + data2.length);
  552. temp.set(data1);
  553. temp.set(data2, data1.length);
  554.  
  555. return temp;
  556. }
  557.  
  558. export interface IEmsgParsingData {
  559. schemeIdUri: string;
  560. value: string;
  561. timeScale: number;
  562. presentationTimeDelta?: number;
  563. presentationTime?: number;
  564. eventDuration: number;
  565. id: number;
  566. payload: Uint8Array;
  567. }
  568.  
  569. export function parseSamples(
  570. timeOffset: number,
  571. track: PassthroughTrack
  572. ): UserdataSample[] {
  573. const seiSamples = [] as UserdataSample[];
  574. const videoData = track.samples;
  575. const timescale = track.timescale;
  576. const trackId = track.id;
  577. let isHEVCFlavor = false;
  578.  
  579. const moofs = findBox(videoData, ['moof']);
  580. moofs.map((moof) => {
  581. const moofOffset = moof.byteOffset - 8;
  582. const trafs = findBox(moof, ['traf']);
  583. trafs.map((traf) => {
  584. // get the base media decode time from the tfdt
  585. const baseTime = findBox(traf, ['tfdt']).map((tfdt) => {
  586. const version = tfdt[0];
  587. let result = readUint32(tfdt, 4);
  588. if (version === 1) {
  589. result *= Math.pow(2, 32);
  590. result += readUint32(tfdt, 8);
  591. }
  592. return result / timescale;
  593. })[0];
  594.  
  595. if (baseTime !== undefined) {
  596. timeOffset = baseTime;
  597. }
  598.  
  599. return findBox(traf, ['tfhd']).map((tfhd) => {
  600. const id = readUint32(tfhd, 4);
  601. const tfhdFlags = readUint32(tfhd, 0) & 0xffffff;
  602. const baseDataOffsetPresent = (tfhdFlags & 0x000001) !== 0;
  603. const sampleDescriptionIndexPresent = (tfhdFlags & 0x000002) !== 0;
  604. const defaultSampleDurationPresent = (tfhdFlags & 0x000008) !== 0;
  605. let defaultSampleDuration = 0;
  606. const defaultSampleSizePresent = (tfhdFlags & 0x000010) !== 0;
  607. let defaultSampleSize = 0;
  608. const defaultSampleFlagsPresent = (tfhdFlags & 0x000020) !== 0;
  609. let tfhdOffset = 8;
  610.  
  611. if (id === trackId) {
  612. if (baseDataOffsetPresent) {
  613. tfhdOffset += 8;
  614. }
  615. if (sampleDescriptionIndexPresent) {
  616. tfhdOffset += 4;
  617. }
  618. if (defaultSampleDurationPresent) {
  619. defaultSampleDuration = readUint32(tfhd, tfhdOffset);
  620. tfhdOffset += 4;
  621. }
  622. if (defaultSampleSizePresent) {
  623. defaultSampleSize = readUint32(tfhd, tfhdOffset);
  624. tfhdOffset += 4;
  625. }
  626. if (defaultSampleFlagsPresent) {
  627. tfhdOffset += 4;
  628. }
  629. if (track.type === 'video') {
  630. isHEVCFlavor = isHEVC(track.codec);
  631. }
  632.  
  633. findBox(traf, ['trun']).map((trun) => {
  634. const version = trun[0];
  635. const flags = readUint32(trun, 0) & 0xffffff;
  636. const dataOffsetPresent = (flags & 0x000001) !== 0;
  637. let dataOffset = 0;
  638. const firstSampleFlagsPresent = (flags & 0x000004) !== 0;
  639. const sampleDurationPresent = (flags & 0x000100) !== 0;
  640. let sampleDuration = 0;
  641. const sampleSizePresent = (flags & 0x000200) !== 0;
  642. let sampleSize = 0;
  643. const sampleFlagsPresent = (flags & 0x000400) !== 0;
  644. const sampleCompositionOffsetsPresent = (flags & 0x000800) !== 0;
  645. let compositionOffset = 0;
  646. const sampleCount = readUint32(trun, 4);
  647. let trunOffset = 8; // past version, flags, and sample count
  648.  
  649. if (dataOffsetPresent) {
  650. dataOffset = readUint32(trun, trunOffset);
  651. trunOffset += 4;
  652. }
  653. if (firstSampleFlagsPresent) {
  654. trunOffset += 4;
  655. }
  656.  
  657. let sampleOffset = dataOffset + moofOffset;
  658.  
  659. for (let ix = 0; ix < sampleCount; ix++) {
  660. if (sampleDurationPresent) {
  661. sampleDuration = readUint32(trun, trunOffset);
  662. trunOffset += 4;
  663. } else {
  664. sampleDuration = defaultSampleDuration;
  665. }
  666. if (sampleSizePresent) {
  667. sampleSize = readUint32(trun, trunOffset);
  668. trunOffset += 4;
  669. } else {
  670. sampleSize = defaultSampleSize;
  671. }
  672. if (sampleFlagsPresent) {
  673. trunOffset += 4;
  674. }
  675. if (sampleCompositionOffsetsPresent) {
  676. if (version === 0) {
  677. compositionOffset = readUint32(trun, trunOffset);
  678. } else {
  679. compositionOffset = readSint32(trun, trunOffset);
  680. }
  681. trunOffset += 4;
  682. }
  683. if (track.type === ElementaryStreamTypes.VIDEO) {
  684. let naluTotalSize = 0;
  685. while (naluTotalSize < sampleSize) {
  686. const naluSize = readUint32(videoData, sampleOffset);
  687. sampleOffset += 4;
  688. const naluType = videoData[sampleOffset] & 0x1f;
  689. if (isSEIMessage(isHEVCFlavor, naluType)) {
  690. const data = videoData.subarray(
  691. sampleOffset,
  692. sampleOffset + naluSize
  693. );
  694. parseSEIMessageFromNALu(
  695. data,
  696. timeOffset + compositionOffset / timescale,
  697. seiSamples
  698. );
  699. }
  700. sampleOffset += naluSize;
  701. naluTotalSize += naluSize + 4;
  702. }
  703. }
  704.  
  705. timeOffset += sampleDuration / timescale;
  706. }
  707. });
  708. }
  709. });
  710. });
  711. });
  712. return seiSamples;
  713. }
  714.  
  715. function isHEVC(codec: string) {
  716. if (!codec) {
  717. return false;
  718. }
  719. const delimit = codec.indexOf('.');
  720. const baseCodec = delimit < 0 ? codec : codec.substring(0, delimit);
  721. return (
  722. baseCodec === 'hvc1' ||
  723. baseCodec === 'hev1' ||
  724. // Dolby Vision
  725. baseCodec === 'dvh1' ||
  726. baseCodec === 'dvhe'
  727. );
  728. }
  729.  
  730. function isSEIMessage(isHEVCFlavor: boolean, naluType: number) {
  731. return isHEVCFlavor ? naluType === 39 || naluType === 40 : naluType === 6;
  732. }
  733.  
  734. export function parseSEIMessageFromNALu(
  735. unescapedData: Uint8Array,
  736. pts: number,
  737. samples: UserdataSample[]
  738. ) {
  739. const data = discardEPB(unescapedData);
  740. let seiPtr = 0;
  741. // skip frameType
  742. seiPtr++;
  743. let payloadType = 0;
  744. let payloadSize = 0;
  745. let endOfCaptions = false;
  746. let b = 0;
  747.  
  748. while (seiPtr < data.length) {
  749. payloadType = 0;
  750. do {
  751. if (seiPtr >= data.length) {
  752. break;
  753. }
  754. b = data[seiPtr++];
  755. payloadType += b;
  756. } while (b === 0xff);
  757.  
  758. // Parse payload size.
  759. payloadSize = 0;
  760. do {
  761. if (seiPtr >= data.length) {
  762. break;
  763. }
  764. b = data[seiPtr++];
  765. payloadSize += b;
  766. } while (b === 0xff);
  767.  
  768. const leftOver = data.length - seiPtr;
  769.  
  770. if (!endOfCaptions && payloadType === 4 && seiPtr < data.length) {
  771. endOfCaptions = true;
  772.  
  773. const countryCode = data[seiPtr++];
  774. if (countryCode === 181) {
  775. const providerCode = readUint16(data, seiPtr);
  776. seiPtr += 2;
  777.  
  778. if (providerCode === 49) {
  779. const userStructure = readUint32(data, seiPtr);
  780. seiPtr += 4;
  781.  
  782. if (userStructure === 0x47413934) {
  783. const userDataType = data[seiPtr++];
  784.  
  785. // Raw CEA-608 bytes wrapped in CEA-708 packet
  786. if (userDataType === 3) {
  787. const firstByte = data[seiPtr++];
  788. const totalCCs = 0x1f & firstByte;
  789. const enabled = 0x40 & firstByte;
  790. const totalBytes = enabled ? 2 + totalCCs * 3 : 0;
  791. const byteArray = new Uint8Array(totalBytes);
  792. if (enabled) {
  793. byteArray[0] = firstByte;
  794. for (let i = 1; i < totalBytes; i++) {
  795. byteArray[i] = data[seiPtr++];
  796. }
  797. }
  798.  
  799. samples.push({
  800. type: userDataType,
  801. payloadType,
  802. pts,
  803. bytes: byteArray,
  804. });
  805. }
  806. }
  807. }
  808. }
  809. } else if (payloadType === 5 && payloadSize < leftOver) {
  810. endOfCaptions = true;
  811.  
  812. if (payloadSize > 16) {
  813. const uuidStrArray: Array<string> = [];
  814. for (let i = 0; i < 16; i++) {
  815. const b = data[seiPtr++].toString(16);
  816. uuidStrArray.push(b.length == 1 ? '0' + b : b);
  817.  
  818. if (i === 3 || i === 5 || i === 7 || i === 9) {
  819. uuidStrArray.push('-');
  820. }
  821. }
  822. const length = payloadSize - 16;
  823. const userDataBytes = new Uint8Array(length);
  824. for (let i = 0; i < length; i++) {
  825. userDataBytes[i] = data[seiPtr++];
  826. }
  827.  
  828. samples.push({
  829. payloadType,
  830. pts,
  831. uuid: uuidStrArray.join(''),
  832. userData: utf8ArrayToStr(userDataBytes),
  833. userDataBytes,
  834. });
  835. }
  836. } else if (payloadSize < leftOver) {
  837. seiPtr += payloadSize;
  838. } else if (payloadSize > leftOver) {
  839. break;
  840. }
  841. }
  842. }
  843.  
  844. /**
  845. * remove Emulation Prevention bytes from a RBSP
  846. */
  847. function discardEPB(data: Uint8Array): Uint8Array {
  848. const length = data.byteLength;
  849. const EPBPositions = [] as Array<number>;
  850. let i = 1;
  851.  
  852. // Find all `Emulation Prevention Bytes`
  853. while (i < length - 2) {
  854. if (data[i] === 0 && data[i + 1] === 0 && data[i + 2] === 0x03) {
  855. EPBPositions.push(i + 2);
  856. i += 2;
  857. } else {
  858. i++;
  859. }
  860. }
  861.  
  862. // If no Emulation Prevention Bytes were found just return the original
  863. // array
  864. if (EPBPositions.length === 0) {
  865. return data;
  866. }
  867.  
  868. // Create a new array to hold the NAL unit data
  869. const newLength = length - EPBPositions.length;
  870. const newData = new Uint8Array(newLength);
  871. let sourceIndex = 0;
  872.  
  873. for (i = 0; i < newLength; sourceIndex++, i++) {
  874. if (sourceIndex === EPBPositions[0]) {
  875. // Skip this byte
  876. sourceIndex++;
  877. // Remove this position index
  878. EPBPositions.shift();
  879. }
  880. newData[i] = data[sourceIndex];
  881. }
  882. return newData;
  883. }
  884.  
  885. export function parseEmsg(data: Uint8Array): IEmsgParsingData {
  886. const version = data[0];
  887. let schemeIdUri: string = '';
  888. let value: string = '';
  889. let timeScale: number = 0;
  890. let presentationTimeDelta: number = 0;
  891. let presentationTime: number = 0;
  892. let eventDuration: number = 0;
  893. let id: number = 0;
  894. let offset: number = 0;
  895.  
  896. if (version === 0) {
  897. while (bin2str(data.subarray(offset, offset + 1)) !== '\0') {
  898. schemeIdUri += bin2str(data.subarray(offset, offset + 1));
  899. offset += 1;
  900. }
  901.  
  902. schemeIdUri += bin2str(data.subarray(offset, offset + 1));
  903. offset += 1;
  904.  
  905. while (bin2str(data.subarray(offset, offset + 1)) !== '\0') {
  906. value += bin2str(data.subarray(offset, offset + 1));
  907. offset += 1;
  908. }
  909.  
  910. value += bin2str(data.subarray(offset, offset + 1));
  911. offset += 1;
  912.  
  913. timeScale = readUint32(data, 12);
  914. presentationTimeDelta = readUint32(data, 16);
  915. eventDuration = readUint32(data, 20);
  916. id = readUint32(data, 24);
  917. offset = 28;
  918. } else if (version === 1) {
  919. offset += 4;
  920. timeScale = readUint32(data, offset);
  921. offset += 4;
  922. const leftPresentationTime = readUint32(data, offset);
  923. offset += 4;
  924. const rightPresentationTime = readUint32(data, offset);
  925. offset += 4;
  926. presentationTime = 2 ** 32 * leftPresentationTime + rightPresentationTime;
  927. if (!Number.isSafeInteger(presentationTime)) {
  928. presentationTime = Number.MAX_SAFE_INTEGER;
  929. // eslint-disable-next-line no-console
  930. console.warn(
  931. 'Presentation time exceeds safe integer limit and wrapped to max safe integer in parsing emsg box'
  932. );
  933. }
  934.  
  935. eventDuration = readUint32(data, offset);
  936. offset += 4;
  937. id = readUint32(data, offset);
  938. offset += 4;
  939.  
  940. while (bin2str(data.subarray(offset, offset + 1)) !== '\0') {
  941. schemeIdUri += bin2str(data.subarray(offset, offset + 1));
  942. offset += 1;
  943. }
  944.  
  945. schemeIdUri += bin2str(data.subarray(offset, offset + 1));
  946. offset += 1;
  947.  
  948. while (bin2str(data.subarray(offset, offset + 1)) !== '\0') {
  949. value += bin2str(data.subarray(offset, offset + 1));
  950. offset += 1;
  951. }
  952.  
  953. value += bin2str(data.subarray(offset, offset + 1));
  954. offset += 1;
  955. }
  956. const payload = data.subarray(offset, data.byteLength);
  957.  
  958. return {
  959. schemeIdUri,
  960. value,
  961. timeScale,
  962. presentationTime,
  963. presentationTimeDelta,
  964. eventDuration,
  965. id,
  966. payload,
  967. };
  968. }