Sunday, July 03, 2011

Audio output on Windows with pure Python + ctypes

I want to make a simple walkie-talkie for a local LAN to have fun with my friends. I've created Raw Audio Socket spec to keep it really simple. We have a lot of different operating systems, so it would be nice to have a single implementation that can be run cross-platform. I chose Python and started with getting the sound playing on Windows.

Python is known for its easy integration with C libraries and, indeed, there is a lot of Python bindings calling audio libraries written in C from Python. However, it requires the library to be compiled for your specific platform. I chose to avoid dependencies on any C code and instead call Windows WinMM Multimedia API  directly with ctypes module.

Thanks to an excellent tutorial by David Overton, here is the pure Python proof of concept that plays standard CD Audio PCM 44.1kHz 16bit Stereo sample from external file to the default sound device.

"""
Implementation of Raw Audio Socket server spec in pure Python
http://code.google.com/p/rainforce/wiki/RawAudioSocket

"""

import sys

#-- CHAPTER 1: CONTINUOUS SOUND PLAYBACK WITH WINDOWS WINMM LIBRARY --
#
# Based on tutorial "Playing Audio in Windows using waveOut Interface"
# by David Overton

import ctypes
from ctypes import wintypes


# 1. Open Sound Device

# --- define necessary data structures from mmsystem.h
HWAVEOUT = wintypes.HANDLE
WAVE_FORMAT_PCM = 0x1
WAVE_MAPPER = -1
CALLBACK_NULL = 0
MMSYSERR_NOERROR = 0

class WAVEFORMATEX(ctypes.Structure):
  _fields_ = [
    ('wFormatTag',  wintypes.WORD),
      # 0x0001 WAVE_FORMAT_PCM. PCM audio
      # 0xFFFE The format is specified in the WAVEFORMATEXTENSIBLE.SubFormat
      # Other values are in mmreg.h 
    ('nChannels',   wintypes.WORD),
    ('SamplesPerSec',  wintypes.DWORD),
    ('AvgBytesPerSec', wintypes.DWORD),
      # for WAVE_FORMAT_PCM is the product of nSamplesPerSec and nBlockAlign
    ('nBlockAlign', wintypes.WORD),
      # for WAVE_FORMAT_PCM is the product of nChannels and wBitsPerSample
      # divided by 8 (bits per byte)
    ('wBitsPerSample', wintypes.WORD),
      # for WAVE_FORMAT_PCM should be equal to 8 or 16
    ('cbSize',      wintypes.WORD)]
      # extra format information size, should be 0
# --- /define

# Data must be processes in pieces that are multiple of
# nBlockAlign bytes of data at a time. Written and read
# data from a device must always start at the beginning
# of a block. Playback of PCM data can not be started in
# the middle of a sample on a non-block-aligned boundary.

hwaveout = HWAVEOUT()
wavefx = WAVEFORMATEX(
  WAVE_FORMAT_PCM,
  2,     # nChannels
  44100, # SamplesPerSec
  705600,# AvgBytesPerSec = 44100 SamplesPerSec * 16 wBitsPerSample
  4,     # nBlockAlign = 2 nChannels * 16 wBitsPerSample / 8 bits per byte
  16,    # wBitsPerSample
  0
)

# Open default wave device
ret = ctypes.windll.winmm.waveOutOpen(
  ctypes.byref(hwaveout), # buffer to receive a handle identifying
                          # the open waveform-audio output device
  WAVE_MAPPER,            # constant to point to default wave device
  ctypes.byref(wavefx),   # identifier for data format sent for device
  0, # DWORD_PTR dwCallback - callback mechanizm
  0, # DWORD_PTR dwCallbackInstance - user instance data for callback
  CALLBACK_NULL # DWORD fdwOpen - flag for opening the device
)

if ret != MMSYSERR_NOERROR:
  sys.exit('Error opening default waveform audio device (WAVE_MAPPER)')

print "Default Wave Audio output device is opened successfully"


# 2. Write Audio Blocks to Device

# --- define necessary data structures
PVOID = wintypes.HANDLE
WAVERR_BASE = 32
WAVERR_STILLPLAYING = WAVERR_BASE + 1
class WAVEHDR(ctypes.Structure):
  _fields_ = [
    ('lpData', wintypes.LPSTR), # pointer to waveform buffer
    ('dwBufferLength', wintypes.DWORD),  # in bytes
    ('dwBytesRecorded', wintypes.DWORD), # when used in input
    ('dwUser', wintypes.DWORD),          # user data
    ('dwFlags', wintypes.DWORD),
    ('dwLoops', wintypes.DWORD),  # times to loop, for output buffers only
    ('lpNext', PVOID),            # reserved, struct wavehdr_tag *lpNext
    ('reserved', wintypes.DWORD)] # reserved
# The lpData, dwBufferLength, and dwFlags members must be set before calling
# the waveInPrepareHeader or waveOutPrepareHeader function. (For either
# function, the dwFlags member must be set to zero.)
# --- /define

class AudioWriter(object):
  def __init__(self, hwaveout):
    self.hwaveout = hwaveout
    self.wavehdr = WAVEHDR()

  def write(self, data):
    self.wavehdr.dwBufferLength = len(data)
    self.wavehdr.lpData = data
    
    # Prepare block for playback
    if ctypes.windll.winmm.waveOutPrepareHeader(
         self.hwaveout, ctypes.byref(self.wavehdr), ctypes.sizeof(self.wavehdr)
       ) != MMSYSERR_NOERROR:
      sys.exit('Error: waveOutPrepareHeader failed')

    # Write block, returns immediately unless a synchronous driver is
    # used (not often)
    if ctypes.windll.winmm.waveOutWrite(
         self.hwaveout, ctypes.byref(self.wavehdr), ctypes.sizeof(self.wavehdr)
       ) != MMSYSERR_NOERROR:
      sys.exit('Error: waveOutWrite failed')

    # [ ] calculate sleep delay based on sample length
    # iii [ ] Measure CPU usage spike during wait without delay
    import time
    time.sleep(1)

    # Wait until playback is finished
    while True:
      # unpreparing the header fails until the block is played
      ret = ctypes.windll.winmm.waveOutUnprepareHeader(
              self.hwaveout,
              ctypes.byref(self.wavehdr),
              ctypes.sizeof(self.wavehdr)
            )
      if ret == WAVERR_STILLPLAYING:
        import time
        time.sleep(1)
        continue
      if ret != MMSYSERR_NOERROR:
        sys.exit('Error: waveOutUnprepareHeader failed with code 0x%x' % ret)
      break


# [ ] it's no good to read all the PCM data into memory at once
data = open('95672__Corsica_S__frequency_change_approved.raw', 'rb').read()

aw = AudioWriter(hwaveout)
aw.write(data)


# x. Close Sound Device

ctypes.windll.winmm.waveOutClose(hwaveout)
print "Default Wave Audio output device is closed"

#-- /CHAPTER 1 --

Windows also provides DirectSound API, but it looks too complicated for me ATM. The code above is also available from https://bitbucket.org/techtonik/audiosocket and marked with 0.1 tag. You may expect to find further modifications there.