No images in this repository’s iceberg at this time
Download raw (23.5 KB)
#!/usr/bin/env python2 # svt.py -- sound visualization tool # Copyright (C) 2009 Luis J. Villanueva # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see http://www.gnu.org/licenses/. # # Renamed to svt to keep separate from the wav2png.py script # # Based on wav2png.py by Bram de Jong <bram.dejong at domain.com where domain in gmail> # From http://freesound.iua.upf.edu/blog/?p=10 # import optparse, math, sys try: import scikits.audiolab as audiolab except: print "\nAudiolab is not installed. \n Download from http://pypi.python.org/pypi/scikits.audiolab/0.8\n" sys.exit (1) try: import Image except: print "\nThe Python Imaging Library is not installed. \n To install in Ubuntu use: \n sudo apt-get install python-imaging\n" sys.exit (1) import ImageFilter, ImageChops, ImageDraw, ImageColor try: import numpy except: print "\nNumpy is not installed. \n To install in Ubuntu use: \n sudo apt-get install python-numpy\n" sys.exit (1) class TestAudioFile(object): """A class that mimics audiolab.sndfile but generates noise instead of reading a wave file. Additionally it can be told to have a "broken" header and thus crashing in the middle of the file. Also useful for testing ultra-short files of 20 samples.""" def __init__(self, num_frames, has_broken_header=False): self.seekpoint = 0 self.num_frames = num_frames self.has_broken_header = has_broken_header def seek(self, seekpoint): self.seekpoint = seekpoint def get_nframes(self): return self.num_frames def get_samplerate(self): return 44100 def get_channels(self): return 1 def read_frames(self, frames_to_read): if self.has_broken_header and self.seekpoint + frames_to_read > self.num_frames / 2: raise IOError() num_frames_left = self.num_frames - self.seekpoint will_read = num_frames_left if num_frames_left < frames_to_read else frames_to_read self.seekpoint += will_read return numpy.random.random(will_read)*2 - 1 class AudioProcessor(object): def __init__(self, audio_file, fft_size, channel, window_function=numpy.ones): self.fft_size = fft_size self.window = window_function(self.fft_size) self.audio_file = audio_file self.frames = audio_file.get_nframes() self.samplerate = audio_file.get_samplerate() self.channels = audio_file.get_channels() self.spectrum_range = None self.lower = 100 self.higher = 22050 self.lower_log = math.log10(self.lower) self.higher_log = math.log10(self.higher) self.clip = lambda val, low, high: min(high, max(low, val)) self.channel = channel def read(self, start, size, resize_if_less=False): """ read size samples starting at start, if resize_if_less is True and less than size samples are read, resize the array to size and fill with zeros """ # number of zeros to add to start and end of the buffer add_to_start = 0 add_to_end = 0 if start < 0: # the first FFT window starts centered around zero if size + start <= 0: return numpy.zeros(size) if resize_if_less else numpy.array([]) else: self.audio_file.seek(0) add_to_start = -start # remember: start is negative! to_read = size + start if to_read > self.frames: add_to_end = to_read - self.frames to_read = self.frames else: self.audio_file.seek(start) to_read = size if start + to_read >= self.frames: to_read = self.frames - start add_to_end = size - to_read try: samples = self.audio_file.read_frames(to_read) except IOError: # this can happen for wave files with broken headers... return numpy.zeros(size) if resize_if_less else numpy.zeros(2) # select which channel to draw if self.channels > 1: if self.channel==1: samples = samples[:,0] if self.channel==2: samples = samples[:,1] if resize_if_less and (add_to_start > 0 or add_to_end > 0): if add_to_start > 0: # NOTE: Alex Leray: the line below was altered has it was # throwing an error on my computer. Don't know why and what it implies # samples = numpy.concatenate((numpy.zeros(add_to_start), samples), axis=1) samples = numpy.concatenate((numpy.zeros(add_to_start), samples)) if add_to_end > 0: samples = numpy.resize(samples, size) samples[size - add_to_end:] = 0 return samples def spectral_centroid(self, seek_point, spec_range=120.0): """ starting at seek_point read fft_size samples, and calculate the spectral centroid """ samples = self.read(seek_point - self.fft_size/2, self.fft_size, True) samples *= self.window fft = numpy.fft.fft(samples) spectrum = numpy.abs(fft[:fft.shape[0] / 2 + 1]) / float(self.fft_size) # normalized abs(FFT) between 0 and 1 length = numpy.float64(spectrum.shape[0]) # scale the db spectrum from [- spec_range db ... 0 db] > [0..1] db_spectrum = ((20*(numpy.log10(spectrum + 1e-30))).clip(-spec_range, 0.0) + spec_range)/spec_range energy = spectrum.sum() spectral_centroid = 0 if energy > 1e-20: # calculate the spectral centroid if self.spectrum_range == None: self.spectrum_range = numpy.arange(length) spectral_centroid = (spectrum * self.spectrum_range).sum() / (energy * (length - 1)) * self.samplerate * 0.5 # clip > log10 > scale between 0 and 1 spectral_centroid = (math.log10(self.clip(spectral_centroid, self.lower, self.higher)) - self.lower_log) / (self.higher_log - self.lower_log) return (spectral_centroid, db_spectrum) def peaks(self, start_seek, end_seek): """ read all samples between start_seek and end_seek, then find the minimum and maximum peak in that range. Returns that pair in the order they were found. So if min was found first, it returns (min, max) else the other way around. """ # larger blocksizes are faster but take more mem... # Aha, Watson, a clue, a tradeof! block_size = 4096 max_index = -1 max_value = -1 min_index = -1 min_value = 1 if end_seek > self.frames: end_seek = self.frames if block_size > end_seek - start_seek: block_size = end_seek - start_seek if block_size <= 1: samples = self.read(start_seek, 1) return samples[0], samples[0] elif block_size == 2: samples = self.read(start_seek, True) return samples[0], samples[1] for i in range(start_seek, end_seek, block_size): samples = self.read(i, block_size) local_max_index = numpy.argmax(samples) local_max_value = samples[local_max_index] if local_max_value > max_value: max_value = local_max_value max_index = local_max_index local_min_index = numpy.argmin(samples) local_min_value = samples[local_min_index] if local_min_value < min_value: min_value = local_min_value min_index = local_min_index return (min_value, max_value) if min_index < max_index else (max_value, min_value) def interpolate_colors(colors, flat=False, num_colors=256): """ given a list of colors, create a larger list of colors interpolating the first one. If flatten is True a list of numers will be returned. If False, a list of (r,g,b) tuples. num_colors is the number of colors wanted in the final list """ palette = [] for i in range(num_colors): index = (i * (len(colors) - 1))/(num_colors - 1.0) index_int = int(index) alpha = index - float(index_int) if alpha > 0: r = (1.0 - alpha) * colors[index_int][0] + alpha * colors[index_int + 1][0] g = (1.0 - alpha) * colors[index_int][1] + alpha * colors[index_int + 1][1] b = (1.0 - alpha) * colors[index_int][2] + alpha * colors[index_int + 1][2] else: r = (1.0 - alpha) * colors[index_int][0] g = (1.0 - alpha) * colors[index_int][1] b = (1.0 - alpha) * colors[index_int][2] if flat: palette.extend((int(r), int(g), int(b))) else: palette.append((int(r), int(g), int(b))) return palette class WaveformImage(object): def __init__(self, image_width, image_height, palette): self.image = Image.new("RGBA", (image_width, image_height)) self.image_width = image_width self.image_height = image_height self.draw = ImageDraw.Draw(self.image) self.previous_x, self.previous_y = None, None if palette==4: colors = [ (0, 0, 0), (255,255,255) ] elif palette==3: colors = [ (255,255,255), (0, 0, 0) ] elif palette==2: colors = [ (255,255,255), (255,255,255), (255,255,255), (225,248,255), (210,241,255), (195,232,255), (180,221,255), (165,208,255), (150,193,255), (135,175,255), (120,156,255), (105,134,255), (90,110,255), (75,85,255), (63,60,255), (64,45,255), (66,30,255), (76,0,255), (0,128,13), (8,138,0), (20,143,0), (33,148,0), (46,153,0), (60,158,0), (91,168,0), (108,173,0), (125,179,0), (143,184,0), (162,189,0), (182,194,0), (199,195,0), (204,184,0), (255,230,128), (255,221,119), (255,213,111), (255,203,102), (255,193,94), (255,181,85), (255,169,77), (255,157,68), (255,143,60), (255,129,51), (255,113,43), (255,97,34), (255,81,25), (255,63,17), (255,45,8), (255,26,0) ] elif palette==1: colors = [ (0, 0, 0), (58/4,68/4,65/4), (80/2,100/2,153/2), (90,180,100), (224,224,44), (255,60,30), (255,255,255) ] # this line gets the old "screaming" colors back... # colors = [self.color_from_value(value/29.0) for value in range(0,30)] self.color_lookup = interpolate_colors(colors) self.pix = self.image.load() def color_from_value(self, value): """ given a value between 0 and 1, return an (r,g,b) tuple """ return ImageColor.getrgb("hsl(%d,%d%%,%d%%)" % (int( (1.0 - value) * 360 ), 80, 50)) def draw_peaks(self, x, peaks, spectral_centroid): """ draw 2 peaks at x using the spectral_centroid for color """ y1 = self.image_height * 0.5 - peaks[0] * (self.image_height - 4) * 0.5 y2 = self.image_height * 0.5 - peaks[1] * (self.image_height - 4) * 0.5 line_color = self.color_lookup[int(spectral_centroid*255.0)] if self.previous_y != None: self.draw.line([self.previous_x, self.previous_y, x, y1, x, y2], line_color) else: self.draw.line([x, y1, x, y2], line_color) self.previous_x, self.previous_y = x, y2 self.draw_anti_aliased_pixels(x, y1, y2, line_color) def draw_anti_aliased_pixels(self, x, y1, y2, color): """ vertical anti-aliasing at y1 and y2 """ y_max = max(y1, y2) y_max_int = int(y_max) alpha = y_max - y_max_int if alpha > 0.0 and alpha < 1.0 and y_max_int + 1 < self.image_height: current_pix = self.pix[x, y_max_int + 1] r = int((1-alpha)*current_pix[0] + alpha*color[0]) g = int((1-alpha)*current_pix[1] + alpha*color[1]) b = int((1-alpha)*current_pix[2] + alpha*color[2]) self.pix[x, y_max_int + 1] = (r,g,b) y_min = min(y1, y2) y_min_int = int(y_min) alpha = 1.0 - (y_min - y_min_int) if alpha > 0.0 and alpha < 1.0 and y_min_int - 1 >= 0: current_pix = self.pix[x, y_min_int - 1] r = int((1-alpha)*current_pix[0] + alpha*color[0]) g = int((1-alpha)*current_pix[1] + alpha*color[1]) b = int((1-alpha)*current_pix[2] + alpha*color[2]) self.pix[x, y_min_int - 1] = (r,g,b) def save(self, filename): # draw a zero "zero" line a = 25 for x in range(self.image_width): self.pix[x, self.image_height/2] = tuple(map(lambda p: p+a, self.pix[x, self.image_height/2])) self.image.save(filename) class SpectrogramImage(object): def __init__(self, image_width, image_height, fft_size, f_max, f_min, nyquist_freq, palette): self.image = Image.new("P", (image_height, image_width)) self.image_width = image_width self.image_height = image_height self.fft_size = fft_size self.f_max = f_max self.f_min = f_min self.nyquist_freq = nyquist_freq self.palette = palette if nyquist_freq<f_max: print "\nWarning: The specified maximum frequency to draw (%d Hz) is higher that what the digital file allows, which is %d Hz. The image file will have black areas on top that correspond to empty data.\n" % (f_max,nyquist_freq) if palette==4: colors = [ (0, 0, 0), (255,255,255) ] elif palette==3: colors = [ (255,255,255), (0, 0, 0) ] elif palette==2: colors = [ (255,255,255), (255,255,255), (255,255,255), (225,248,255), (210,241,255), (195,232,255), (180,221,255), (165,208,255), (150,193,255), (135,175,255), (120,156,255), (105,134,255), (90,110,255), (75,85,255), (63,60,255), (64,45,255), (66,30,255), (76,0,255), (0,128,13), (8,138,0), (20,143,0), (33,148,0), (46,153,0), (60,158,0), (91,168,0), (108,173,0), (125,179,0), (143,184,0), (162,189,0), (182,194,0), (199,195,0), (204,184,0), (255,230,128), (255,221,119), (255,213,111), (255,203,102), (255,193,94), (255,181,85), (255,169,77), (255,157,68), (255,143,60), (255,129,51), (255,113,43), (255,97,34), (255,81,25), (255,63,17), (255,45,8), (255,26,0) ] elif palette==1: colors = [ (0, 0, 0), (58/4,68/4,65/4), (80/2,100/2,153/2), (90,180,100), (224,224,44), (255,60,30), (255,255,255) ] self.image.putpalette(interpolate_colors(colors, True)) # generate the lookup which translates y-coordinate to fft-bin self.y_to_bin = [] y_min = math.log10(f_min) y_max = math.log10(f_max) for y in range(self.image_height): # log scale # freq = math.pow(10.0, y_min + y / (image_height - 1.0) *(y_max - y_min)) # arithmetic scale freq = f_min + y / (image_height - 1.0) *(f_max - f_min) # uses the nyquist frequency to allow files of different sampling rate bin = freq / nyquist_freq * (self.fft_size/2 + 1) # bin = freq / 22050.0 * (self.fft_size/2 + 1) if bin < self.fft_size/2: alpha = bin - int(bin) self.y_to_bin.append((int(bin), alpha * 255)) # this is a bit strange, but using image.load()[x,y] = ... is # a lot slower than using image.putadata and then rotating the image # so we store all the pixels in an array and then create the image when saving self.pixels = [] def draw_spectrum(self, x, spectrum): for (index, alpha) in self.y_to_bin: self.pixels.append( int( ((255.0-alpha) * spectrum[index] + alpha * spectrum[index + 1] )) ) for y in range(len(self.y_to_bin), self.image_height): self.pixels.append(0) def save(self, filename): self.image.putdata(self.pixels) self.image.transpose(Image.ROTATE_90).save(filename) def create_png(input_filename, output_filename_w, output_filename_s, image_width, image_height, fft_size, f_max, f_min, wavefile, palette, channel): print "processing file %s:\n\t" % input_file, audio_file = audiolab.sndfile(input_filename, 'read') samples_per_pixel = audio_file.get_nframes() / float(image_width) nyquist_freq = (audio_file.get_samplerate() / 2) + 0.0 processor = AudioProcessor(audio_file, fft_size, channel, numpy.hanning) if wavefile==1: waveform = WaveformImage(image_width, image_height, palette) spectrogram = SpectrogramImage(image_width, image_height, fft_size, f_max, f_min, nyquist_freq, palette) for x in range(image_width): if x % (image_width/10) == 0: sys.stdout.write('.') sys.stdout.flush() seek_point = int(x * samples_per_pixel) next_seek_point = int((x + 1) * samples_per_pixel) (spectral_centroid, db_spectrum) = processor.spectral_centroid(seek_point) if wavefile==1: peaks = processor.peaks(seek_point, next_seek_point) waveform.draw_peaks(x, peaks, spectral_centroid) spectrogram.draw_spectrum(x, db_spectrum) if wavefile==1: waveform.save(output_filename_w) spectrogram.save(output_filename_s) print " done" if __name__ == '__main__': parser = optparse.OptionParser("usage: %prog [options] input-filename", conflict_handler="resolve") parser.add_option("-a", "--waveout", action="store", dest="output_filename_w", type="string", help="output waveform image (default input filename + _w.png)") parser.add_option("-o", "--wavefile", action="store", dest="wavefile", type="int", help="draw waveform image (yes:1, no: 0; default: no)") parser.add_option("-s", "--specout", action="store", dest="output_filename_s", type="string", help="output spectrogram image (default input filename + _s.png)") parser.add_option("-w", "--width", action="store", dest="image_width", type="int", help="image width in pixels (default %default)") parser.add_option("-h", "--height", action="store", dest="image_height", type="int", help="image height in pixels (default %default)") parser.add_option("-f", "--fft", action="store", dest="fft_size", type="int", help="fft size, power of 2 for increased performance (default %default)") parser.add_option("-m", "--fmax", action="store", dest="f_max", type="int", help="Maximum freq to draw, in Hz (default %default)") parser.add_option("-i", "--fmin", action="store", dest="f_min", type="int", help="Minimum freq to draw, in Hz (default %default)") parser.add_option("-p", "--palette", action="store", dest="palette", type="int", help="Which color palette to use to draw the spectrogram, 1 for dark background, 2 for white background, 3 for grayscale with white background or 4 for grayscale with black background (default %default)") parser.add_option("-c", "--channel", action="store", dest="channel", type="int", help="Which channel to draw in a stereo file, 1 for left or 2 for right (default %default)") parser.add_option("-v", "--version", action="store_true", dest="version", help="display version information") parser.set_defaults(output_filename_w=None, output_filename_s=None, image_width=500, image_height=170, fft_size=2048, f_max=22050, f_min=10, wavefile=0, palette=1, channel=1) (options, args) = parser.parse_args() if not options.version: if len(args) == 0: parser.print_help() parser.error("not enough arguments") if len(args) > 1 and (options.output_filename_w != None or options.output_filename_s != None): parser.error("when processing multiple files you can't define the output filename!") # process all files so the user can use wildcards like *.wav for input_file in args: if options.channel==2: output_file_w = options.output_filename_w or input_file + "_w_r.png" output_file_s = options.output_filename_s or input_file + "_s_r.png" else: output_file_w = options.output_filename_w or input_file + "_w.png" output_file_s = options.output_filename_s or input_file + "_s.png" args = (input_file, output_file_w, output_file_s, options.image_width, options.image_height, options.fft_size, options.f_max, options.f_min, options.wavefile, options.palette, options.channel) create_png(*args) else: print "\n svt version 0.4.2"