1
0
mirror of https://github.com/matrix-org/matrix-js-sdk.git synced 2025-08-07 23:02:56 +03:00

Fix screenshare failing after several attempts (#2771)

* Fix screenshare failing after several attempts

Re-use any existing transceivers when screen sharing. This prevents
transceivers accumulating and making the SDP too big: see linked bug.

This also switches from `addTrack()` to `addTransceiver ()` which is
not that large of a change, other than having to explicitly find the
transceivers after an offer has arrived rather than just adding tracks
and letting WebRTC take care of it.

Fixes https://github.com/vector-im/element-call/issues/625

* Fix tests

* Unused import

* Use a map instead of an array

* Add comment

* more comment

* Remove commented code

* Remove unintentional debugging

* Add test for screenshare transceiver re-use

* Type alias for transceiver map
This commit is contained in:
David Baker
2022-10-19 16:00:54 +01:00
committed by GitHub
parent dfe535bc07
commit c57c8978cf
4 changed files with 235 additions and 108 deletions

View File

@@ -104,12 +104,12 @@ export class MockRTCPeerConnection {
private negotiationNeededListener: () => void;
public iceCandidateListener?: (e: RTCPeerConnectionIceEvent) => void;
public onTrackListener?: (e: RTCTrackEvent) => void;
private needsNegotiation = false;
public needsNegotiation = false;
public readyToNegotiate: Promise<void>;
private onReadyToNegotiate: () => void;
localDescription: RTCSessionDescription;
signalingState: RTCSignalingState = "stable";
public senders: MockRTCRtpSender[] = [];
public transceivers: MockRTCRtpTransceiver[] = [];
public static triggerAllNegotiations(): void {
for (const inst of this.instances) {
@@ -169,12 +169,23 @@ export class MockRTCPeerConnection {
}
close() { }
getStats() { return []; }
addTrack(track: MockMediaStreamTrack): MockRTCRtpSender {
addTransceiver(track: MockMediaStreamTrack): MockRTCRtpTransceiver {
this.needsNegotiation = true;
this.onReadyToNegotiate();
const newSender = new MockRTCRtpSender(track);
this.senders.push(newSender);
return newSender;
const newReceiver = new MockRTCRtpReceiver(track);
const newTransceiver = new MockRTCRtpTransceiver(this);
newTransceiver.sender = newSender as unknown as RTCRtpSender;
newTransceiver.receiver = newReceiver as unknown as RTCRtpReceiver;
this.transceivers.push(newTransceiver);
return newTransceiver;
}
addTrack(track: MockMediaStreamTrack): MockRTCRtpSender {
return this.addTransceiver(track).sender as unknown as MockRTCRtpSender;
}
removeTrack() {
@@ -182,9 +193,8 @@ export class MockRTCPeerConnection {
this.onReadyToNegotiate();
}
getSenders(): MockRTCRtpSender[] { return this.senders; }
getTransceivers = jest.fn().mockReturnValue([]);
getTransceivers(): MockRTCRtpTransceiver[] { return this.transceivers; }
getSenders(): MockRTCRtpSender[] { return this.transceivers.map(t => t.sender as unknown as MockRTCRtpSender); }
doNegotiation() {
if (this.needsNegotiation && this.negotiationNeededListener) {
@@ -198,7 +208,23 @@ export class MockRTCRtpSender {
constructor(public track: MockMediaStreamTrack) { }
replaceTrack(track: MockMediaStreamTrack) { this.track = track; }
setCodecPreferences(prefs: RTCRtpCodecCapability[]): void {}
}
export class MockRTCRtpReceiver {
constructor(public track: MockMediaStreamTrack) { }
}
export class MockRTCRtpTransceiver {
constructor(private peerConn: MockRTCPeerConnection) {}
public sender: RTCRtpSender;
public receiver: RTCRtpReceiver;
public set direction(_: string) {
this.peerConn.needsNegotiation = true;
}
setCodecPreferences = jest.fn<void, RTCRtpCodecCapability[]>();
}
export class MockMediaStreamTrack {

View File

@@ -41,7 +41,6 @@ import {
installWebRTCMocks,
MockRTCPeerConnection,
SCREENSHARE_STREAM_ID,
MockRTCRtpSender,
} from "../../test-utils/webrtc";
import { CallFeed } from "../../../src/webrtc/callFeed";
import { EventType, IContent, ISendEventResponse, MatrixEvent, Room } from "../../../src";
@@ -370,17 +369,15 @@ describe('Call', function() {
).typed(),
);
const usermediaSenders: Array<RTCRtpSender> = (call as any).usermediaSenders;
// XXX: Lots of inspecting the prvate state of the call object here
const transceivers: Map<string, RTCRtpTransceiver> = (call as any).transceivers;
expect(call.localUsermediaStream.id).toBe("stream");
expect(call.localUsermediaStream.getAudioTracks()[0].id).toBe("new_audio_track");
expect(call.localUsermediaStream.getVideoTracks()[0].id).toBe("video_track");
expect(usermediaSenders.find((sender) => {
return sender?.track?.kind === "audio";
}).track.id).toBe("new_audio_track");
expect(usermediaSenders.find((sender) => {
return sender?.track?.kind === "video";
}).track.id).toBe("video_track");
// call has a function for generating these but we hardcode here to avoid exporting it
expect(transceivers.get("m.usermedia:audio").sender.track.id).toBe("new_audio_track");
expect(transceivers.get("m.usermedia:video").sender.track.id).toBe("video_track");
});
it("should handle upgrade to video call", async () => {
@@ -400,16 +397,13 @@ describe('Call', function() {
// setLocalVideoMuted probably?
await (call as any).upgradeCall(false, true);
const usermediaSenders: Array<RTCRtpSender> = (call as any).usermediaSenders;
// XXX: More inspecting private state of the call object
const transceivers: Map<string, RTCRtpTransceiver> = (call as any).transceivers;
expect(call.localUsermediaStream.getAudioTracks()[0].id).toBe("usermedia_audio_track");
expect(call.localUsermediaStream.getVideoTracks()[0].id).toBe("usermedia_video_track");
expect(usermediaSenders.find((sender) => {
return sender?.track?.kind === "audio";
}).track.id).toBe("usermedia_audio_track");
expect(usermediaSenders.find((sender) => {
return sender?.track?.kind === "video";
}).track.id).toBe("usermedia_video_track");
expect(transceivers.get("m.usermedia:audio").sender.track.id).toBe("usermedia_audio_track");
expect(transceivers.get("m.usermedia:video").sender.track.id).toBe("usermedia_video_track");
});
it("should handle SDPStreamMetadata changes", async () => {
@@ -479,6 +473,23 @@ describe('Call', function() {
});
describe("should deduce the call type correctly", () => {
beforeEach(async () => {
// start an incoming call, but add no feeds
await call.initWithInvite({
getContent: jest.fn().mockReturnValue({
version: "1",
call_id: "call_id",
party_id: "remote_party_id",
lifetime: CALL_LIFETIME,
offer: {
sdp: DUMMY_SDP,
},
}),
getSender: () => "@test:foo",
getLocalAge: () => 1,
} as unknown as MatrixEvent);
});
it("if no video", async () => {
call.getOpponentMember = jest.fn().mockReturnValue({ userId: "@bob:bar.uk" });
@@ -1057,9 +1068,24 @@ describe('Call', function() {
});
describe("Screen sharing", () => {
const waitNegotiateFunc = resolve => {
mockSendEvent.mockImplementationOnce(() => {
// Note that the peer connection here is a dummy one and always returns
// dummy SDP, so there's not much point returning the content: the SDP will
// always be the same.
resolve();
return Promise.resolve({ event_id: "foo" });
});
};
beforeEach(async () => {
await startVoiceCall(client, call);
const sendNegotiatePromise = new Promise<void>(waitNegotiateFunc);
MockRTCPeerConnection.triggerAllNegotiations();
await sendNegotiatePromise;
await call.onAnswerReceived(makeMockEvent("@test:foo", {
"version": 1,
"call_id": call.callId,
@@ -1090,12 +1116,7 @@ describe('Call', function() {
).toHaveLength(1);
mockSendEvent.mockReset();
const sendNegotiatePromise = new Promise<void>(resolve => {
mockSendEvent.mockImplementationOnce(() => {
resolve();
return Promise.resolve({ event_id: "foo" });
});
});
const sendNegotiatePromise = new Promise<void>(waitNegotiateFunc);
MockRTCPeerConnection.triggerAllNegotiations();
await sendNegotiatePromise;
@@ -1130,29 +1151,52 @@ describe('Call', function() {
headerExtensions: [],
});
const prom = new Promise<void>(resolve => {
const mockPeerConn = call.peerConn as unknown as MockRTCPeerConnection;
mockPeerConn.addTrack = jest.fn().mockImplementation((track: MockMediaStreamTrack) => {
const mockSender = new MockRTCRtpSender(track);
mockPeerConn.getTransceivers.mockReturnValue([{
sender: mockSender,
setCodecPreferences: (prefs: RTCRtpCodecCapability[]) => {
expect(prefs).toEqual([
expect.objectContaining({ mimeType: "video/somethingelse" }),
]);
resolve();
},
}]);
return mockSender;
});
});
mockSendEvent.mockReset();
const sendNegotiatePromise = new Promise<void>(waitNegotiateFunc);
await call.setScreensharingEnabled(true);
MockRTCPeerConnection.triggerAllNegotiations();
await prom;
await sendNegotiatePromise;
const mockPeerConn = call.peerConn as unknown as MockRTCPeerConnection;
expect(
mockPeerConn.transceivers[mockPeerConn.transceivers.length - 1].setCodecPreferences,
).toHaveBeenCalledWith([expect.objectContaining({ mimeType: "video/somethingelse" })]);
});
it("re-uses transceiver when screen sharing is re-enabled", async () => {
const mockPeerConn = call.peerConn as unknown as MockRTCPeerConnection;
// sanity check: we should start with one transciever (user media audio)
expect(mockPeerConn.transceivers.length).toEqual(1);
const screenshareOnProm1 = new Promise<void>(waitNegotiateFunc);
await call.setScreensharingEnabled(true);
MockRTCPeerConnection.triggerAllNegotiations();
await screenshareOnProm1;
// we should now have another transciever for the screenshare
expect(mockPeerConn.transceivers.length).toEqual(2);
const screenshareOffProm = new Promise<void>(waitNegotiateFunc);
await call.setScreensharingEnabled(false);
MockRTCPeerConnection.triggerAllNegotiations();
await screenshareOffProm;
// both transceivers should still be there
expect(mockPeerConn.transceivers.length).toEqual(2);
const screenshareOnProm2 = new Promise<void>(waitNegotiateFunc);
await call.setScreensharingEnabled(true);
MockRTCPeerConnection.triggerAllNegotiations();
await screenshareOnProm2;
// should still be two, ie. another one should not have been created
// when re-enabling the screen share.
expect(mockPeerConn.transceivers.length).toEqual(2);
});
});

View File

@@ -308,6 +308,16 @@ export type CallEventHandlerMap = {
[CallEvent.SendVoipEvent]: (event: Record<string, any>) => void;
};
// The key of the transceiver map (purpose + media type, separated by ':')
type TransceiverKey = string;
// generates keys for the map of transceivers
// kind is unfortunately a string rather than MediaType as this is the type of
// track.kind
function getTransceiverKey(purpose: SDPStreamMetadataPurpose, kind: TransceiverKey): string {
return purpose + ':' + kind;
}
/**
* Construct a new Matrix Call.
* @constructor
@@ -345,8 +355,10 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
private candidateSendTries = 0;
private candidatesEnded = false;
private feeds: Array<CallFeed> = [];
private usermediaSenders: Array<RTCRtpSender> = [];
private screensharingSenders: Array<RTCRtpSender> = [];
// our transceivers for each purpose and type of media
private transceivers = new Map<TransceiverKey, RTCRtpTransceiver>();
private inviteOrAnswerSent = false;
private waitForLocalAVStream: boolean;
private successor: MatrixCall;
@@ -634,6 +646,18 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
audioMuted,
videoMuted,
}));
// gather transceivers from the new tracks so that we can use the same ones for tracks that
// we add later. We only do this for user media streams though: screenshare streams just always
// get their own unidirectional transceiver since a bidirectional screen share is pretty rare
// (we *could* re-use an existing recvonly transceiver for this, but it's simpler to just not).
if (purpose == SDPStreamMetadataPurpose.Usermedia) {
for (const track of stream.getTracks()) {
const transceiver = this.peerConn.getTransceivers().find(t => t.receiver.track == track);
this.transceivers.set(getTransceiverKey(purpose, track.kind), transceiver);
}
}
this.emit(CallEvent.FeedsChanged, this.feeds);
logger.info(
@@ -675,6 +699,12 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
stream,
purpose,
}));
for (const track of stream.getTracks()) {
const transceiver = this.peerConn.getTransceivers().find(t => t.receiver.track == track);
this.transceivers.set(getTransceiverKey(purpose, track.kind), transceiver);
}
this.emit(CallEvent.FeedsChanged, this.feeds);
logger.info(`Call ${this.callId} pushed remote stream (id="${stream.id}", active="${stream.active}")`);
@@ -722,11 +752,6 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
this.feeds.push(callFeed);
if (addToPeerConnection) {
const senderArray = callFeed.purpose === SDPStreamMetadataPurpose.Usermedia ?
this.usermediaSenders : this.screensharingSenders;
// Empty the array
senderArray.splice(0, senderArray.length);
for (const track of callFeed.stream.getTracks()) {
logger.info(
`Call ${this.callId} ` +
@@ -738,7 +763,27 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
`enabled=${track.enabled}` +
`) to peer connection`,
);
senderArray.push(this.peerConn.addTrack(track, callFeed.stream));
const tKey = getTransceiverKey(callFeed.purpose, track.kind);
if (this.transceivers.has(tKey)) {
// we already have a sender, so we re-use it. We try to re-use transceivers as much
// as possible because they can't be removed once added, so otherwise they just
// accumulate which makes the SDP very large very quickly: in fact it only takes
// about 6 video tracks to exceed the maximum size of an Olm-encrypted
// Matrix event.
const transceiver = this.transceivers.get(tKey);
transceiver.sender.replaceTrack(track);
// set the direction to indicate we're going to start sending again
// (this will trigger the re-negotiation)
transceiver.direction = transceiver.direction === "inactive" ? "sendonly" : "sendrecv";
} else {
// create a new one: pass the track in and everything happens automatically
this.transceivers.set(tKey, this.peerConn.addTransceiver(track, {
streams: [callFeed.stream],
direction: callFeed.purpose === SDPStreamMetadataPurpose.Usermedia ? "sendrecv" : "sendonly",
}));
}
}
}
@@ -759,20 +804,23 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
* @param callFeed to remove
*/
public removeLocalFeed(callFeed: CallFeed): void {
const senderArray = callFeed.purpose === SDPStreamMetadataPurpose.Usermedia
? this.usermediaSenders
: this.screensharingSenders;
const audioTransceiverKey = getTransceiverKey(callFeed.purpose, "audio");
const videoTransceiverKey = getTransceiverKey(callFeed.purpose, "video");
for (const sender of senderArray) {
this.peerConn.removeTrack(sender);
for (const transceiverKey of [audioTransceiverKey, videoTransceiverKey]) {
// this is slightly mixing the track and transceiver API but is basically just shorthand.
// There is no way to actually remove a transceiver, so this just sets it to inactive
// (or recvonly) and replaces the source with nothing.
if (this.transceivers.has(transceiverKey)) {
const transceiver = this.transceivers.get(transceiverKey);
if (transceiver.sender) this.peerConn.removeTrack(transceiver.sender);
}
}
if (callFeed.purpose === SDPStreamMetadataPurpose.Screenshare) {
this.client.getMediaHandler().stopScreensharingStream(callFeed.stream);
}
// Empty the array
senderArray.splice(0, senderArray.length);
this.deleteFeed(callFeed);
}
@@ -1139,9 +1187,19 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
return false;
}
} else {
for (const sender of this.screensharingSenders) {
this.peerConn.removeTrack(sender);
const audioTransceiver = this.transceivers.get(getTransceiverKey(
SDPStreamMetadataPurpose.Screenshare, "audio",
));
const videoTransceiver = this.transceivers.get(getTransceiverKey(
SDPStreamMetadataPurpose.Screenshare, "video",
));
for (const transceiver of [audioTransceiver, videoTransceiver]) {
// this is slightly mixing the track and transceiver API but is basically just shorthand
// for removing the sender.
if (transceiver && transceiver.sender) this.peerConn.removeTrack(transceiver.sender);
}
this.client.getMediaHandler().stopScreensharingStream(this.localScreensharingStream);
this.deleteFeedByStream(this.localScreensharingStream);
return false;
@@ -1167,9 +1225,11 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
const track = stream.getTracks().find((track) => {
return track.kind === "video";
});
const sender = this.usermediaSenders.find((sender) => {
return sender.track?.kind === "video";
});
const sender = this.transceivers.get(getTransceiverKey(
SDPStreamMetadataPurpose.Usermedia, "video",
)).sender;
sender.replaceTrack(track);
this.pushNewLocalFeed(stream, SDPStreamMetadataPurpose.Screenshare, false);
@@ -1183,9 +1243,9 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
const track = this.localUsermediaStream.getTracks().find((track) => {
return track.kind === "video";
});
const sender = this.usermediaSenders.find((sender) => {
return sender.track?.kind === "video";
});
const sender = this.transceivers.get(getTransceiverKey(
SDPStreamMetadataPurpose.Usermedia, "video",
)).sender;
sender.replaceTrack(track);
this.client.getMediaHandler().stopScreensharingStream(this.localScreensharingStream);
@@ -1219,15 +1279,12 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
this.localUsermediaStream.addTrack(track);
}
const newSenders = [];
for (const track of stream.getTracks()) {
const oldSender = this.usermediaSenders.find((sender) => {
return sender.track?.kind === track.kind;
});
let newSender: RTCRtpSender;
const tKey = getTransceiverKey(SDPStreamMetadataPurpose.Usermedia, track.kind);
const oldSender = this.transceivers.get(tKey)?.sender;
let added = false;
if (oldSender) {
try {
logger.info(
`Call ${this.callId} `+
@@ -1239,8 +1296,13 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
`) to peer connection`,
);
await oldSender.replaceTrack(track);
newSender = oldSender;
added = true;
} catch (error) {
logger.warn(`replaceTrack failed: adding new transceiver instead`, error);
}
}
if (!added) {
logger.info(
`Call ${this.callId} `+
`Adding track (` +
@@ -1250,13 +1312,13 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
`streamPurpose="${callFeed.purpose}"` +
`) to peer connection`,
);
newSender = this.peerConn.addTrack(track, this.localUsermediaStream);
}
newSenders.push(newSender);
this.transceivers.set(tKey, this.peerConn.addTransceiver(track, {
streams: [this.localUsermediaStream],
direction: "sendrecv",
}));
}
}
this.usermediaSenders = newSenders;
}
/**
@@ -2109,17 +2171,10 @@ export class MatrixCall extends TypedEventEmitter<CallEvent, CallEventHandlerMap
}
}
for (const trans of this.peerConn.getTransceivers()) {
if (
this.screensharingSenders.includes(trans.sender) &&
(
trans.sender.track?.kind === "video" ||
trans.receiver.track?.kind === "video"
)
) {
trans.setCodecPreferences(codecs);
}
}
const screenshareVideoTransceiver = this.transceivers.get(getTransceiverKey(
SDPStreamMetadataPurpose.Screenshare, "video",
));
if (screenshareVideoTransceiver) screenshareVideoTransceiver.setCodecPreferences(codecs);
}
private onNegotiationNeeded = async (): Promise<void> => {

View File

@@ -607,7 +607,9 @@ export class GroupCall extends TypedEventEmitter<
return false;
}
} else {
await Promise.all(this.calls.map(call => call.removeLocalFeed(call.localScreensharingFeed)));
await Promise.all(this.calls.map(call => {
if (call.localScreensharingFeed) call.removeLocalFeed(call.localScreensharingFeed);
}));
this.client.getMediaHandler().stopScreensharingStream(this.localScreenshareFeed.stream);
this.removeScreenshareFeed(this.localScreenshareFeed);
this.localScreenshareFeed = undefined;