Skip to content

Collaborative Video & Voice Chat Integration #49

@bchou9

Description

@bchou9

Feature Description

Integrate real-time video and voice communication directly into the canvas interface to enable seamless collaboration without switching to external tools.

Problem Statement

Current collaboration limitations:

  • Users must use separate tools for voice/video (Zoom, Teams, Discord)
  • Context switching reduces productivity
  • Difficult to coordinate drawing and discussion
  • No integrated communication history
  • Can't point at canvas elements during discussion

Proposed Integration

Technology Options

  1. WebRTC (Peer-to-Peer) - Direct browser communication
  2. Agora.io - Managed real-time video/voice service
  3. Twilio Video - Cloud-based video conferencing
  4. Daily.co - Embedded video chat API
  5. Jitsi Meet - Open-source video conferencing

Recommended: Start with WebRTC for P2P calls, optionally add Agora/Daily for larger groups.

1. WebRTC Video/Voice Manager

// frontend/src/services/WebRTCManager.js
export class WebRTCManager {
  constructor(socket, roomId, userId) {
    this.socket = socket;
    this.roomId = roomId;
    this.userId = userId;
    this.peerConnections = new Map();
    this.localStream = null;
    this.remoteStreams = new Map();
    this.configuration = {
      iceServers: [
        { urls: 'stun:stun.l.google.com:19302' },
        { urls: 'stun:stun1.l.google.com:19302' }
      ]
    };
  }

  async startLocalStream(video = true, audio = true) {
    try {
      this.localStream = await navigator.mediaDevices.getUserMedia({
        video: video ? { width: 640, height: 480 } : false,
        audio: {
          echoCancellation: true,
          noiseSuppression: true,
          autoGainControl: true
        }
      });
      
      return this.localStream;
    } catch (error) {
      console.error('Error accessing media devices:', error);
      throw error;
    }
  }

  async createPeerConnection(remotePeerId) {
    const pc = new RTCPeerConnection(this.configuration);
    
    // Add local stream tracks
    if (this.localStream) {
      this.localStream.getTracks().forEach(track => {
        pc.addTrack(track, this.localStream);
      });
    }

    // Handle remote stream
    pc.ontrack = (event) => {
      const [remoteStream] = event.streams;
      this.remoteStreams.set(remotePeerId, remoteStream);
      this.emit('remoteStream', { peerId: remotePeerId, stream: remoteStream });
    };

    // Handle ICE candidates
    pc.onicecandidate = (event) => {
      if (event.candidate) {
        this.socket.emit('ice-candidate', {
          roomId: this.roomId,
          targetPeer: remotePeerId,
          candidate: event.candidate
        });
      }
    };

    // Connection state changes
    pc.onconnectionstatechange = () => {
      console.log(`Connection state: ${pc.connectionState}`);
      if (pc.connectionState === 'disconnected' || pc.connectionState === 'failed') {
        this.removePeerConnection(remotePeerId);
      }
    };

    this.peerConnections.set(remotePeerId, pc);
    return pc;
  }

  async createOffer(remotePeerId) {
    const pc = await this.createPeerConnection(remotePeerId);
    const offer = await pc.createOffer();
    await pc.setLocalDescription(offer);

    this.socket.emit('webrtc-offer', {
      roomId: this.roomId,
      targetPeer: remotePeerId,
      offer: offer
    });
  }

  async handleOffer(remotePeerId, offer) {
    const pc = await this.createPeerConnection(remotePeerId);
    await pc.setRemoteDescription(new RTCSessionDescription(offer));
    
    const answer = await pc.createAnswer();
    await pc.setLocalDescription(answer);

    this.socket.emit('webrtc-answer', {
      roomId: this.roomId,
      targetPeer: remotePeerId,
      answer: answer
    });
  }

  async handleAnswer(remotePeerId, answer) {
    const pc = this.peerConnections.get(remotePeerId);
    if (pc) {
      await pc.setRemoteDescription(new RTCSessionDescription(answer));
    }
  }

  async handleIceCandidate(remotePeerId, candidate) {
    const pc = this.peerConnections.get(remotePeerId);
    if (pc) {
      await pc.addIceCandidate(new RTCIceCandidate(candidate));
    }
  }

  toggleAudio(enabled) {
    if (this.localStream) {
      this.localStream.getAudioTracks().forEach(track => {
        track.enabled = enabled;
      });
    }
  }

  toggleVideo(enabled) {
    if (this.localStream) {
      this.localStream.getVideoTracks().forEach(track => {
        track.enabled = enabled;
      });
    }
  }

  stopLocalStream() {
    if (this.localStream) {
      this.localStream.getTracks().forEach(track => track.stop());
      this.localStream = null;
    }
  }

  removePeerConnection(remotePeerId) {
    const pc = this.peerConnections.get(remotePeerId);
    if (pc) {
      pc.close();
      this.peerConnections.delete(remotePeerId);
    }
    this.remoteStreams.delete(remotePeerId);
  }

  disconnect() {
    this.peerConnections.forEach((pc, peerId) => {
      this.removePeerConnection(peerId);
    });
    this.stopLocalStream();
  }
}

2. Video Chat UI Component

// frontend/src/components/VideoChat.jsx
export function VideoChat({ roomId, participants }) {
  const [localStream, setLocalStream] = useState(null);
  const [remoteStreams, setRemoteStreams] = useState(new Map());
  const [audioEnabled, setAudioEnabled] = useState(true);
  const [videoEnabled, setVideoEnabled] = useState(true);
  const [expanded, setExpanded] = useState(false);
  const webRTCManager = useRef(null);
  const localVideoRef = useRef(null);

  useEffect(() => {
    const socket = getSocket();
    webRTCManager.current = new WebRTCManager(socket, roomId, getUserId());

    // Setup WebRTC signaling listeners
    socket.on('webrtc-offer', async ({ fromPeer, offer }) => {
      await webRTCManager.current.handleOffer(fromPeer, offer);
    });

    socket.on('webrtc-answer', async ({ fromPeer, answer }) => {
      await webRTCManager.current.handleAnswer(fromPeer, answer);
    });

    socket.on('ice-candidate', async ({ fromPeer, candidate }) => {
      await webRTCManager.current.handleIceCandidate(fromPeer, candidate);
    });

    socket.on('peer-joined', ({ peerId }) => {
      // Initiate call to new peer
      webRTCManager.current.createOffer(peerId);
    });

    return () => {
      webRTCManager.current?.disconnect();
    };
  }, [roomId]);

  const startCall = async () => {
    try {
      const stream = await webRTCManager.current.startLocalStream(videoEnabled, audioEnabled);
      setLocalStream(stream);
      
      if (localVideoRef.current) {
        localVideoRef.current.srcObject = stream;
      }

      // Notify other participants
      participants.forEach(participant => {
        if (participant.id !== getUserId()) {
          webRTCManager.current.createOffer(participant.id);
        }
      });
    } catch (error) {
      showToast('Failed to access camera/microphone', 'error');
    }
  };

  const endCall = () => {
    webRTCManager.current.disconnect();
    setLocalStream(null);
    setRemoteStreams(new Map());
  };

  const toggleAudio = () => {
    setAudioEnabled(newState);
    webRTCManager.current.toggleAudio(newState);
  };

  const toggleVideo = () => {
    setVideoEnabled(newState);
    webRTCManager.current.toggleVideo(newState);
  };

  return (
    <Box
      sx={{
        position: 'fixed',
        bottom: 16,
        right: 16,
        zIndex: 1000,
        width: expanded ? 400 : 60,
        transition: 'width 0.3s'
      }}
    >
        <Fab color="primary" onClick={() => setExpanded(true)}>
          <VideocamIcon />
        </Fab>
      )}

      {expanded && (
        <Paper elevation={3} sx={{ p: 2 }}>
          <Box display="flex" justifyContent="space-between" mb={2}>
            <Typography variant="h6">Video Chat</Typography>
            <IconButton size="small" onClick={() => setExpanded(false)}>
              <MinimizeIcon />
            </IconButton>
          </Box>

          {/* Local video */}
          {localStream && (
            <Box mb={2}>
              <video
                ref={localVideoRef}
                autoPlay
                muted
                style={{ width: '100%', borderRadius: 8 }}
              />
              <Typography variant="caption">You</Typography>
            </Box>
          )}

          {/* Remote videos */}
          <Box display="flex" flexDirection="column" gap={1} mb={2}>
            {Array.from(remoteStreams.entries()).map(([peerId, stream]) => (
              <RemoteVideo key={peerId} peerId={peerId} stream={stream} />
            ))}
          </Box>

          {/* Controls */}
          <Box display="flex" justifyContent="center" gap={1}>
              <Button variant="contained" onClick={startCall} startIcon={<CallIcon />}>
                Start Call
              </Button>
            ) : (
              <>
                <IconButton 
                  onClick={toggleAudio}
                  color={audioEnabled ? 'primary' : 'error'}
                >
                  {audioEnabled ? <MicIcon /> : <MicOffIcon />}
                </IconButton>
                
                <IconButton 
                  onClick={toggleVideo}
                  color={videoEnabled ? 'primary' : 'error'}
                >
                  {videoEnabled ? <VideocamIcon /> : <VideocamOffIcon />}
                </IconButton>
                
                <IconButton onClick={endCall} color="error">
                  <CallEndIcon />
                </IconButton>
              </>
            )}
          </Box>
        </Paper>
      )}
    </Box>
  );
}

function RemoteVideo({ peerId, stream }) {
  const videoRef = useRef(null);

  useEffect(() => {
    if (videoRef.current && stream) {
      videoRef.current.srcObject = stream;
    }
  }, [stream]);

  return (
    <Box>
      <video
        ref={videoRef}
        autoPlay
        style={{ width: '100%', borderRadius: 8 }}
      />
      <Typography variant="caption">Participant {peerId.slice(0, 8)}</Typography>
    </Box>
  );
}

3. Backend WebRTC Signaling

# backend/routes/socketio_handlers.py

@socketio.on('webrtc-offer')
@require_auth_socketio
def handle_webrtc_offer(data):
    target_peer = data.get('targetPeer')
    offer = data.get('offer')
    
    emit('webrtc-offer', {
        'fromPeer': request.sid,
        'offer': offer
    }, room=target_peer)

@socketio.on('webrtc-answer')
@require_auth_socketio
def handle_webrtc_answer(data):
    target_peer = data.get('targetPeer')
    answer = data.get('answer')
    
    emit('webrtc-answer', {
        'fromPeer': request.sid,
        'answer': answer
    }, room=target_peer)

@socketio.on('ice-candidate')
@require_auth_socketio
def handle_ice_candidate(data):
    target_peer = data.get('targetPeer')
    candidate = data.get('candidate')
    
    emit('ice-candidate', {
        'fromPeer': request.sid,
        'candidate': candidate
    }, room=target_peer)

@socketio.on('join_room')
@require_auth_socketio
def handle_join_room(data):
    room_id = data.get('roomId')
    join_room(room_id)
    
    # Notify others in room
    emit('peer-joined', {
        'peerId': request.sid
    }, room=room_id, skip_sid=request.sid)

4. Screen Sharing Feature

// Add to WebRTCManager
async startScreenShare() {
  try {
    const screenStream = await navigator.mediaDevices.getDisplayMedia({
      video: { cursor: 'always' },
      audio: false
    });

    // Replace video track in all peer connections
    const videoTrack = screenStream.getVideoTracks()[0];
    
    this.peerConnections.forEach((pc) => {
      const sender = pc.getSenders().find(s => s.track?.kind === 'video');
      if (sender) {
        sender.replaceTrack(videoTrack);
      }
    });

    // Handle screen share stop
    videoTrack.onended = () => {
      this.stopScreenShare();
    };

    return screenStream;
  } catch (error) {
    console.error('Error sharing screen:', error);
    throw error;
  }
}

Files to Create/Modify

Frontend:

  • frontend/src/services/WebRTCManager.js ⭐ (NEW)
  • frontend/src/components/VideoChat.jsx ⭐ (NEW)
  • frontend/src/components/VoiceChat.jsx ⭐ (NEW)
  • frontend/src/components/Room.jsx (MODIFY - integrate video)

Backend:

  • backend/routes/socketio_handlers.py (MODIFY - add WebRTC signaling)

Benefits

  • Seamless collaboration experience
  • No external tools needed
  • Context-aware communication
  • Screen sharing for presentations
  • Recording capability for sessions
  • Lower barrier to entry

Future Enhancements

  • Call recording
  • Transcription/captions
  • Breakout rooms
  • Hand raising
  • Reactions/emojis
  • Blur background
  • Virtual backgrounds
  • Spatial audio

Metadata

Metadata

Assignees

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions