From 34500d102b4b7f21f4fed596bed8c0b88c2f9e34 Mon Sep 17 00:00:00 2001
From: David Dietrich <grayarrow@gmail.com>
Date: Sat, 3 Jun 2017 12:11:24 -0700
Subject: [PATCH 1/4] Upgraded to Python 3.

---
 dejavu.py              |   2 +-
 dejavu/__init__.py     | 380 ++++++++++-----------
 dejavu/database.py     |   3 +-
 dejavu/database_sql.py | 725 +++++++++++++++++++++--------------------
 dejavu/decoder.py      |   2 +-
 dejavu/fingerprint.py  | 174 +++++-----
 dejavu/recognize.py    | 176 +++++-----
 7 files changed, 735 insertions(+), 727 deletions(-)

diff --git a/dejavu.py b/dejavu.py
index 0bb4d0c4..ee792f53 100755
--- a/dejavu.py
+++ b/dejavu.py
@@ -13,7 +13,7 @@
 
 warnings.filterwarnings("ignore")
 
-DEFAULT_CONFIG_FILE = "dejavu.cnf.SAMPLE"
+DEFAULT_CONFIG_FILE = "dejavu.cnf"
 
 
 def init(configpath):
diff --git a/dejavu/__init__.py b/dejavu/__init__.py
index 4f6e6e8a..1321084e 100755
--- a/dejavu/__init__.py
+++ b/dejavu/__init__.py
@@ -1,202 +1,204 @@
 from dejavu.database import get_database, Database
 import dejavu.decoder as decoder
-import fingerprint
+import dejavu.fingerprint as fingerprint
 import multiprocessing
 import os
 import traceback
 import sys
 
-
-class Dejavu(object):
-
-    SONG_ID = "song_id"
-    SONG_NAME = 'song_name'
-    CONFIDENCE = 'confidence'
-    MATCH_TIME = 'match_time'
-    OFFSET = 'offset'
-    OFFSET_SECS = 'offset_seconds'
-
-    def __init__(self, config):
-        super(Dejavu, self).__init__()
-
-        self.config = config
-
-        # initialize db
-        db_cls = get_database(config.get("database_type", None))
-
-        self.db = db_cls(**config.get("database", {}))
-        self.db.setup()
-
-        # if we should limit seconds fingerprinted,
-        # None|-1 means use entire track
-        self.limit = self.config.get("fingerprint_limit", None)
-        if self.limit == -1:  # for JSON compatibility
-            self.limit = None
-        self.get_fingerprinted_songs()
-
-    def get_fingerprinted_songs(self):
-        # get songs previously indexed
-        self.songs = self.db.get_songs()
-        self.songhashes_set = set()  # to know which ones we've computed before
-        for song in self.songs:
-            song_hash = song[Database.FIELD_FILE_SHA1]
-            self.songhashes_set.add(song_hash)
-
-    def fingerprint_directory(self, path, extensions, nprocesses=None):
-        # Try to use the maximum amount of processes if not given.
-        try:
-            nprocesses = nprocesses or multiprocessing.cpu_count()
-        except NotImplementedError:
-            nprocesses = 1
-        else:
-            nprocesses = 1 if nprocesses <= 0 else nprocesses
-
-        pool = multiprocessing.Pool(nprocesses)
-
-        filenames_to_fingerprint = []
-        for filename, _ in decoder.find_files(path, extensions):
-
-            # don't refingerprint already fingerprinted files
-            if decoder.unique_hash(filename) in self.songhashes_set:
-                print "%s already fingerprinted, continuing..." % filename
-                continue
-
-            filenames_to_fingerprint.append(filename)
-
-        # Prepare _fingerprint_worker input
-        worker_input = zip(filenames_to_fingerprint,
-                           [self.limit] * len(filenames_to_fingerprint))
-
-        # Send off our tasks
-        iterator = pool.imap_unordered(_fingerprint_worker,
-                                       worker_input)
-
-        # Loop till we have all of them
-        while True:
-            try:
-                song_name, hashes, file_hash = iterator.next()
-            except multiprocessing.TimeoutError:
-                continue
-            except StopIteration:
-                break
-            except:
-                print("Failed fingerprinting")
-                # Print traceback because we can't reraise it here
-                traceback.print_exc(file=sys.stdout)
-            else:
-                sid = self.db.insert_song(song_name, file_hash)
-
-                self.db.insert_hashes(sid, hashes)
-                self.db.set_song_fingerprinted(sid)
-                self.get_fingerprinted_songs()
-
-        pool.close()
-        pool.join()
-
-    def fingerprint_file(self, filepath, song_name=None):
-        songname = decoder.path_to_songname(filepath)
-        song_hash = decoder.unique_hash(filepath)
-        song_name = song_name or songname
-        # don't refingerprint already fingerprinted files
-        if song_hash in self.songhashes_set:
-            print "%s already fingerprinted, continuing..." % song_name
-        else:
-            song_name, hashes, file_hash = _fingerprint_worker(
-                filepath,
-                self.limit,
-                song_name=song_name
-            )
-            sid = self.db.insert_song(song_name, file_hash)
-
-            self.db.insert_hashes(sid, hashes)
-            self.db.set_song_fingerprinted(sid)
-            self.get_fingerprinted_songs()
-
-    def find_matches(self, samples, Fs=fingerprint.DEFAULT_FS):
-        hashes = fingerprint.fingerprint(samples, Fs=Fs)
-        return self.db.return_matches(hashes)
-
-    def align_matches(self, matches):
-        """
-            Finds hash matches that align in time with other matches and finds
-            consensus about which hashes are "true" signal from the audio.
-
-            Returns a dictionary with match information.
-        """
-        # align by diffs
-        diff_counter = {}
-        largest = 0
-        largest_count = 0
-        song_id = -1
-        for tup in matches:
-            sid, diff = tup
-            if diff not in diff_counter:
-                diff_counter[diff] = {}
-            if sid not in diff_counter[diff]:
-                diff_counter[diff][sid] = 0
-            diff_counter[diff][sid] += 1
-
-            if diff_counter[diff][sid] > largest_count:
-                largest = diff
-                largest_count = diff_counter[diff][sid]
-                song_id = sid
-
-        # extract idenfication
-        song = self.db.get_song_by_id(song_id)
-        if song:
-            # TODO: Clarify what `get_song_by_id` should return.
-            songname = song.get(Dejavu.SONG_NAME, None)
-        else:
-            return None
-
-        # return match info
-        nseconds = round(float(largest) / fingerprint.DEFAULT_FS *
-                         fingerprint.DEFAULT_WINDOW_SIZE *
-                         fingerprint.DEFAULT_OVERLAP_RATIO, 5)
-        song = {
-            Dejavu.SONG_ID : song_id,
-            Dejavu.SONG_NAME : songname,
-            Dejavu.CONFIDENCE : largest_count,
-            Dejavu.OFFSET : int(largest),
-            Dejavu.OFFSET_SECS : nseconds,
-            Database.FIELD_FILE_SHA1 : song.get(Database.FIELD_FILE_SHA1, None),}
-        return song
-
-    def recognize(self, recognizer, *options, **kwoptions):
-        r = recognizer(self)
-        return r.recognize(*options, **kwoptions)
+class Dejavu():
+
+	SONG_ID = "song_id"
+	SONG_NAME = 'song_name'
+	CONFIDENCE = 'confidence'
+	MATCH_TIME = 'match_time'
+	OFFSET = 'offset'
+	OFFSET_SECS = 'offset_seconds'
+
+	def __init__(self, config):
+		super(Dejavu, self).__init__()
+
+		self.config = config
+
+		# initialize db
+		db_cls = get_database(config.get("database_type", None))
+
+		self.db = db_cls(**config.get("database", {}))
+		self.db.setup()
+
+		# if we should limit seconds fingerprinted,
+		# None|-1 means use entire track
+		self.limit = self.config.get("fingerprint_limit", None)
+		if self.limit == -1:  # for JSON compatibility
+			self.limit = None
+		self.get_fingerprinted_songs()
+
+	def get_fingerprinted_songs(self):
+		# get songs previously indexed
+		self.songs = self.db.get_songs()
+		self.songhashes_set = set()  # to know which ones we've computed before
+
+		for song in self.songs:
+			#print('song', song)
+			song_hash = song[Database.FIELD_FILE_SHA1]
+			self.songhashes_set.add(song_hash)
+
+	def fingerprint_directory(self, path, extensions, nprocesses=None):
+		# Try to use the maximum amount of processes if not given.
+		try:
+			nprocesses = nprocesses or multiprocessing.cpu_count()
+		except NotImplementedError:
+			nprocesses = 1
+		else:
+			nprocesses = 1 if nprocesses <= 0 else nprocesses
+
+		pool = multiprocessing.Pool(nprocesses)
+
+		filenames_to_fingerprint = []
+		for filename, _ in decoder.find_files(path, extensions):
+
+			# don't refingerprint already fingerprinted files
+			if decoder.unique_hash(filename) in self.songhashes_set:
+				print("%s already fingerprinted, continuing..." % filename)
+				continue
+
+			filenames_to_fingerprint.append(filename)
+
+		# Prepare _fingerprint_worker input
+		worker_input = zip(filenames_to_fingerprint,
+						   [self.limit] * len(filenames_to_fingerprint))
+
+		# Send off our tasks
+		iterator = pool.imap_unordered(_fingerprint_worker,
+									   worker_input)
+
+		# Loop till we have all of them
+		while True:
+			try:
+				song_name, hashes, file_hash = iterator.next()
+				#print('hashes', hashes)
+			except multiprocessing.TimeoutError:
+				continue
+			except StopIteration:
+				break
+			except:
+				print("Failed fingerprinting")
+				# Print traceback because we can't reraise it here
+				traceback.print_exc(file=sys.stdout)
+			else:
+				sid = self.db.insert_song(song_name, file_hash)
+
+				self.db.insert_hashes(sid, hashes)
+				self.db.set_song_fingerprinted(sid)
+				self.get_fingerprinted_songs()
+
+		pool.close()
+		pool.join()
+
+	def fingerprint_file(self, filepath, song_name=None):
+		songname = decoder.path_to_songname(filepath)
+		song_hash = decoder.unique_hash(filepath)
+		song_name = song_name or songname
+		# don't refingerprint already fingerprinted files
+		if song_hash in self.songhashes_set:
+			print("%s already fingerprinted, continuing..." % song_name)
+		else:
+			song_name, hashes, file_hash = _fingerprint_worker(
+				filepath,
+				self.limit,
+				song_name=song_name
+			)
+			sid = self.db.insert_song(song_name, file_hash)
+
+			self.db.insert_hashes(sid, hashes)
+			self.db.set_song_fingerprinted(sid)
+			self.get_fingerprinted_songs()
+
+	def find_matches(self, samples, Fs=fingerprint.DEFAULT_FS):
+		hashes = fingerprint.fingerprint(samples, Fs=Fs)
+		return self.db.return_matches(hashes)
+
+	def align_matches(self, matches):
+		"""
+			Finds hash matches that align in time with other matches and finds
+			consensus about which hashes are "true" signal from the audio.
+
+			Returns a dictionary with match information.
+		"""
+		# align by diffs
+		diff_counter = {}
+		largest = 0
+		largest_count = 0
+		song_id = -1
+		for tup in matches:
+			sid, diff = tup
+			if diff not in diff_counter:
+				diff_counter[diff] = {}
+			if sid not in diff_counter[diff]:
+				diff_counter[diff][sid] = 0
+			diff_counter[diff][sid] += 1
+
+			if diff_counter[diff][sid] > largest_count:
+				largest = diff
+				largest_count = diff_counter[diff][sid]
+				song_id = sid
+
+		# extract idenfication
+		song = self.db.get_song_by_id(song_id)
+		if song:
+			# TODO: Clarify what `get_song_by_id` should return.
+			songname = song.get(Dejavu.SONG_NAME, None)
+		else:
+			return None
+
+		# return match info
+		nseconds = round(float(largest) / fingerprint.DEFAULT_FS *
+						 fingerprint.DEFAULT_WINDOW_SIZE *
+						 fingerprint.DEFAULT_OVERLAP_RATIO, 5)
+		song = {
+			Dejavu.SONG_ID : song_id,
+			Dejavu.SONG_NAME : songname,
+			Dejavu.CONFIDENCE : largest_count,
+			Dejavu.OFFSET : int(largest),
+			Dejavu.OFFSET_SECS : nseconds,
+			Database.FIELD_FILE_SHA1 : song.get(Database.FIELD_FILE_SHA1, None),}
+		return song
+
+	def recognize(self, recognizer, *options, **kwoptions):
+		r = recognizer(self)
+		return r.recognize(*options, **kwoptions)
 
 
 def _fingerprint_worker(filename, limit=None, song_name=None):
-    # Pool.imap sends arguments as tuples so we have to unpack
-    # them ourself.
-    try:
-        filename, limit = filename
-    except ValueError:
-        pass
-
-    songname, extension = os.path.splitext(os.path.basename(filename))
-    song_name = song_name or songname
-    channels, Fs, file_hash = decoder.read(filename, limit)
-    result = set()
-    channel_amount = len(channels)
-
-    for channeln, channel in enumerate(channels):
-        # TODO: Remove prints or change them into optional logging.
-        print("Fingerprinting channel %d/%d for %s" % (channeln + 1,
-                                                       channel_amount,
-                                                       filename))
-        hashes = fingerprint.fingerprint(channel, Fs=Fs)
-        print("Finished channel %d/%d for %s" % (channeln + 1, channel_amount,
-                                                 filename))
-        result |= set(hashes)
-
-    return song_name, result, file_hash
+	# Pool.imap sends arguments as tuples so we have to unpack
+	# them ourself.
+	try:
+		filename, limit = filename
+	except ValueError:
+		pass
+
+	songname, extension = os.path.splitext(os.path.basename(filename))
+	song_name = song_name or songname
+	channels, Fs, file_hash = decoder.read(filename, limit)
+	result = set()
+	channel_amount = len(channels)
+
+	for channeln, channel in enumerate(channels):
+		# TODO: Remove prints or change them into optional logging.
+		print("Fingerprinting channel %d/%d for %s" % (channeln + 1,
+													   channel_amount,
+													   filename))
+		hashes = fingerprint.fingerprint(channel, Fs=Fs)
+		print("Finished channel %d/%d for %s" % (channeln + 1, channel_amount,
+												 filename))
+		result |= set(hashes)
+
+	return song_name, result, file_hash
 
 
 def chunkify(lst, n):
-    """
-    Splits a list into roughly n equal parts.
-    http://stackoverflow.com/questions/2130016/splitting-a-list-of-arbitrary-size-into-only-roughly-n-equal-parts
-    """
-    return [lst[i::n] for i in xrange(n)]
+	"""
+	Splits a list into roughly n equal parts.
+	http://stackoverflow.com/questions/2130016/splitting-a-list-of-arbitrary-size-into-only-roughly-n-equal-parts
+	"""
+	return [lst[i::n] for i in xrange(n)]
diff --git a/dejavu/database.py b/dejavu/database.py
index e5732ff0..a7410d4b 100755
--- a/dejavu/database.py
+++ b/dejavu/database.py
@@ -1,8 +1,7 @@
 from __future__ import absolute_import
 import abc
 
-
-class Database(object):
+class Database():
     __metaclass__ = abc.ABCMeta
 
     FIELD_FILE_SHA1 = 'file_sha1'
diff --git a/dejavu/database_sql.py b/dejavu/database_sql.py
index 031bdcb4..2bd5adc0 100755
--- a/dejavu/database_sql.py
+++ b/dejavu/database_sql.py
@@ -1,373 +1,378 @@
 from __future__ import absolute_import
-from itertools import izip_longest
-import Queue
+from itertools import zip_longest
+import queue
 
-import MySQLdb as mysql
-from MySQLdb.cursors import DictCursor
+from mysql.connector import (connection)
+from mysql.connector import (cursor)
 
 from dejavu.database import Database
 
 
 class SQLDatabase(Database):
-    """
-    Queries:
-
-    1) Find duplicates (shouldn't be any, though):
-
-        select `hash`, `song_id`, `offset`, count(*) cnt
-        from fingerprints
-        group by `hash`, `song_id`, `offset`
-        having cnt > 1
-        order by cnt asc;
-
-    2) Get number of hashes by song:
-
-        select song_id, song_name, count(song_id) as num
-        from fingerprints
-        natural join songs
-        group by song_id
-        order by count(song_id) desc;
-
-    3) get hashes with highest number of collisions
-
-        select
-            hash,
-            count(distinct song_id) as n
-        from fingerprints
-        group by `hash`
-        order by n DESC;
-
-    => 26 different songs with same fingerprint (392 times):
-
-        select songs.song_name, fingerprints.offset
-        from fingerprints natural join songs
-        where fingerprints.hash = "08d3c833b71c60a7b620322ac0c0aba7bf5a3e73";
-    """
-
-    type = "mysql"
-
-    # tables
-    FINGERPRINTS_TABLENAME = "fingerprints"
-    SONGS_TABLENAME = "songs"
-
-    # fields
-    FIELD_FINGERPRINTED = "fingerprinted"
-
-    # creates
-    CREATE_FINGERPRINTS_TABLE = """
-        CREATE TABLE IF NOT EXISTS `%s` (
-             `%s` binary(10) not null,
-             `%s` mediumint unsigned not null,
-             `%s` int unsigned not null,
-         INDEX (%s),
-         UNIQUE KEY `unique_constraint` (%s, %s, %s),
-         FOREIGN KEY (%s) REFERENCES %s(%s) ON DELETE CASCADE
-    ) ENGINE=INNODB;""" % (
-        FINGERPRINTS_TABLENAME, Database.FIELD_HASH,
-        Database.FIELD_SONG_ID, Database.FIELD_OFFSET, Database.FIELD_HASH,
-        Database.FIELD_SONG_ID, Database.FIELD_OFFSET, Database.FIELD_HASH,
-        Database.FIELD_SONG_ID, SONGS_TABLENAME, Database.FIELD_SONG_ID
-    )
-
-    CREATE_SONGS_TABLE = """
-        CREATE TABLE IF NOT EXISTS `%s` (
-            `%s` mediumint unsigned not null auto_increment,
-            `%s` varchar(250) not null,
-            `%s` tinyint default 0,
-            `%s` binary(20) not null,
-        PRIMARY KEY (`%s`),
-        UNIQUE KEY `%s` (`%s`)
-    ) ENGINE=INNODB;""" % (
-        SONGS_TABLENAME, Database.FIELD_SONG_ID, Database.FIELD_SONGNAME, FIELD_FINGERPRINTED,
-        Database.FIELD_FILE_SHA1,
-        Database.FIELD_SONG_ID, Database.FIELD_SONG_ID, Database.FIELD_SONG_ID,
-    )
-
-    # inserts (ignores duplicates)
-    INSERT_FINGERPRINT = """
-        INSERT IGNORE INTO %s (%s, %s, %s) values
-            (UNHEX(%%s), %%s, %%s);
-    """ % (FINGERPRINTS_TABLENAME, Database.FIELD_HASH, Database.FIELD_SONG_ID, Database.FIELD_OFFSET)
-
-    INSERT_SONG = "INSERT INTO %s (%s, %s) values (%%s, UNHEX(%%s));" % (
-        SONGS_TABLENAME, Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1)
-
-    # selects
-    SELECT = """
-        SELECT %s, %s FROM %s WHERE %s = UNHEX(%%s);
-    """ % (Database.FIELD_SONG_ID, Database.FIELD_OFFSET, FINGERPRINTS_TABLENAME, Database.FIELD_HASH)
-
-    SELECT_MULTIPLE = """
-        SELECT HEX(%s), %s, %s FROM %s WHERE %s IN (%%s);
-    """ % (Database.FIELD_HASH, Database.FIELD_SONG_ID, Database.FIELD_OFFSET,
-           FINGERPRINTS_TABLENAME, Database.FIELD_HASH)
-
-    SELECT_ALL = """
-        SELECT %s, %s FROM %s;
-    """ % (Database.FIELD_SONG_ID, Database.FIELD_OFFSET, FINGERPRINTS_TABLENAME)
-
-    SELECT_SONG = """
-        SELECT %s, HEX(%s) as %s FROM %s WHERE %s = %%s;
-    """ % (Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1, Database.FIELD_FILE_SHA1, SONGS_TABLENAME, Database.FIELD_SONG_ID)
-
-    SELECT_NUM_FINGERPRINTS = """
-        SELECT COUNT(*) as n FROM %s
-    """ % (FINGERPRINTS_TABLENAME)
-
-    SELECT_UNIQUE_SONG_IDS = """
-        SELECT COUNT(DISTINCT %s) as n FROM %s WHERE %s = 1;
-    """ % (Database.FIELD_SONG_ID, SONGS_TABLENAME, FIELD_FINGERPRINTED)
-
-    SELECT_SONGS = """
-        SELECT %s, %s, HEX(%s) as %s FROM %s WHERE %s = 1;
-    """ % (Database.FIELD_SONG_ID, Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1, Database.FIELD_FILE_SHA1,
-           SONGS_TABLENAME, FIELD_FINGERPRINTED)
-
-    # drops
-    DROP_FINGERPRINTS = "DROP TABLE IF EXISTS %s;" % FINGERPRINTS_TABLENAME
-    DROP_SONGS = "DROP TABLE IF EXISTS %s;" % SONGS_TABLENAME
-
-    # update
-    UPDATE_SONG_FINGERPRINTED = """
-        UPDATE %s SET %s = 1 WHERE %s = %%s
-    """ % (SONGS_TABLENAME, FIELD_FINGERPRINTED, Database.FIELD_SONG_ID)
-
-    # delete
-    DELETE_UNFINGERPRINTED = """
-        DELETE FROM %s WHERE %s = 0;
-    """ % (SONGS_TABLENAME, FIELD_FINGERPRINTED)
-
-    def __init__(self, **options):
-        super(SQLDatabase, self).__init__()
-        self.cursor = cursor_factory(**options)
-        self._options = options
-
-    def after_fork(self):
-        # Clear the cursor cache, we don't want any stale connections from
-        # the previous process.
-        Cursor.clear_cache()
-
-    def setup(self):
-        """
-        Creates any non-existing tables required for dejavu to function.
-
-        This also removes all songs that have been added but have no
-        fingerprints associated with them.
-        """
-        with self.cursor() as cur:
-            cur.execute(self.CREATE_SONGS_TABLE)
-            cur.execute(self.CREATE_FINGERPRINTS_TABLE)
-            cur.execute(self.DELETE_UNFINGERPRINTED)
-
-    def empty(self):
-        """
-        Drops tables created by dejavu and then creates them again
-        by calling `SQLDatabase.setup`.
-
-        .. warning:
-            This will result in a loss of data
-        """
-        with self.cursor() as cur:
-            cur.execute(self.DROP_FINGERPRINTS)
-            cur.execute(self.DROP_SONGS)
-
-        self.setup()
-
-    def delete_unfingerprinted_songs(self):
-        """
-        Removes all songs that have no fingerprints associated with them.
-        """
-        with self.cursor() as cur:
-            cur.execute(self.DELETE_UNFINGERPRINTED)
-
-    def get_num_songs(self):
-        """
-        Returns number of songs the database has fingerprinted.
-        """
-        with self.cursor() as cur:
-            cur.execute(self.SELECT_UNIQUE_SONG_IDS)
-
-            for count, in cur:
-                return count
-            return 0
-
-    def get_num_fingerprints(self):
-        """
-        Returns number of fingerprints the database has fingerprinted.
-        """
-        with self.cursor() as cur:
-            cur.execute(self.SELECT_NUM_FINGERPRINTS)
-
-            for count, in cur:
-                return count
-            return 0
-
-    def set_song_fingerprinted(self, sid):
-        """
-        Set the fingerprinted flag to TRUE (1) once a song has been completely
-        fingerprinted in the database.
-        """
-        with self.cursor() as cur:
-            cur.execute(self.UPDATE_SONG_FINGERPRINTED, (sid,))
-
-    def get_songs(self):
-        """
-        Return songs that have the fingerprinted flag set TRUE (1).
-        """
-        with self.cursor(cursor_type=DictCursor) as cur:
-            cur.execute(self.SELECT_SONGS)
-            for row in cur:
-                yield row
-
-    def get_song_by_id(self, sid):
-        """
-        Returns song by its ID.
-        """
-        with self.cursor(cursor_type=DictCursor) as cur:
-            cur.execute(self.SELECT_SONG, (sid,))
-            return cur.fetchone()
-
-    def insert(self, hash, sid, offset):
-        """
-        Insert a (sha1, song_id, offset) row into database.
-        """
-        with self.cursor() as cur:
-            cur.execute(self.INSERT_FINGERPRINT, (hash, sid, offset))
-
-    def insert_song(self, songname, file_hash):
-        """
-        Inserts song in the database and returns the ID of the inserted record.
-        """
-        with self.cursor() as cur:
-            cur.execute(self.INSERT_SONG, (songname, file_hash))
-            return cur.lastrowid
-
-    def query(self, hash):
-        """
-        Return all tuples associated with hash.
-
-        If hash is None, returns all entries in the
-        database (be careful with that one!).
-        """
-        # select all if no key
-        query = self.SELECT_ALL if hash is None else self.SELECT
-
-        with self.cursor() as cur:
-            cur.execute(query)
-            for sid, offset in cur:
-                yield (sid, offset)
-
-    def get_iterable_kv_pairs(self):
-        """
-        Returns all tuples in database.
-        """
-        return self.query(None)
-
-    def insert_hashes(self, sid, hashes):
-        """
-        Insert series of hash => song_id, offset
-        values into the database.
-        """
-        values = []
-        for hash, offset in hashes:
-            values.append((hash, sid, offset))
-
-        with self.cursor() as cur:
-            for split_values in grouper(values, 1000):
-                cur.executemany(self.INSERT_FINGERPRINT, split_values)
-
-    def return_matches(self, hashes):
-        """
-        Return the (song_id, offset_diff) tuples associated with
-        a list of (sha1, sample_offset) values.
-        """
-        # Create a dictionary of hash => offset pairs for later lookups
-        mapper = {}
-        for hash, offset in hashes:
-            mapper[hash.upper()] = offset
-
-        # Get an iteratable of all the hashes we need
-        values = mapper.keys()
-
-        with self.cursor() as cur:
-            for split_values in grouper(values, 1000):
-                # Create our IN part of the query
-                query = self.SELECT_MULTIPLE
-                query = query % ', '.join(['UNHEX(%s)'] * len(split_values))
-
-                cur.execute(query, split_values)
-
-                for hash, sid, offset in cur:
-                    # (sid, db_offset - song_sampled_offset)
-                    yield (sid, offset - mapper[hash])
-
-    def __getstate__(self):
-        return (self._options,)
-
-    def __setstate__(self, state):
-        self._options, = state
-        self.cursor = cursor_factory(**self._options)
+	"""
+	Queries:
+
+	1) Find duplicates (shouldn't be any, though):
+
+		select `hash`, `song_id`, `offset`, count(*) cnt
+		from fingerprints
+		group by `hash`, `song_id`, `offset`
+		having cnt > 1
+		order by cnt asc;
+
+	2) Get number of hashes by song:
+
+		select song_id, song_name, count(song_id) as num
+		from fingerprints
+		natural join songs
+		group by song_id
+		order by count(song_id) desc;
+
+	3) get hashes with highest number of collisions
+
+		select
+			hash,
+			count(distinct song_id) as n
+		from fingerprints
+		group by `hash`
+		order by n DESC;
+
+	=> 26 different songs with same fingerprint (392 times):
+
+		select songs.song_name, fingerprints.offset
+		from fingerprints natural join songs
+		where fingerprints.hash = "08d3c833b71c60a7b620322ac0c0aba7bf5a3e73";
+	"""
+
+	type = "mysql"
+
+	# tables
+	FINGERPRINTS_TABLENAME = "fingerprints"
+	SONGS_TABLENAME = "songs"
+
+	# fields
+	FIELD_FINGERPRINTED = "fingerprinted"
+
+	# creates
+	CREATE_FINGERPRINTS_TABLE = """
+		CREATE TABLE IF NOT EXISTS `%s` (
+			 `%s` binary(10) not null,
+			 `%s` mediumint unsigned not null,
+			 `%s` int unsigned not null,
+		 INDEX (%s),
+		 UNIQUE KEY `unique_constraint` (%s, %s, %s),
+		 FOREIGN KEY (%s) REFERENCES %s(%s) ON DELETE CASCADE
+	) ENGINE=INNODB;""" % (
+		FINGERPRINTS_TABLENAME, Database.FIELD_HASH,
+		Database.FIELD_SONG_ID, Database.FIELD_OFFSET, Database.FIELD_HASH,
+		Database.FIELD_SONG_ID, Database.FIELD_OFFSET, Database.FIELD_HASH,
+		Database.FIELD_SONG_ID, SONGS_TABLENAME, Database.FIELD_SONG_ID
+	)
+
+	CREATE_SONGS_TABLE = """
+		CREATE TABLE IF NOT EXISTS `%s` (
+			`%s` mediumint unsigned not null auto_increment,
+			`%s` varchar(250) not null,
+			`%s` tinyint default 0,
+			`%s` binary(20) not null,
+		PRIMARY KEY (`%s`),
+		UNIQUE KEY `%s` (`%s`)
+	) ENGINE=INNODB;""" % (
+		SONGS_TABLENAME, Database.FIELD_SONG_ID, Database.FIELD_SONGNAME, FIELD_FINGERPRINTED,
+		Database.FIELD_FILE_SHA1,
+		Database.FIELD_SONG_ID, Database.FIELD_SONG_ID, Database.FIELD_SONG_ID,
+	)
+
+	# inserts (ignores duplicates)
+	INSERT_FINGERPRINT = """
+		INSERT IGNORE INTO %s (%s, %s, %s) values
+			(UNHEX(%%s), %%s, %%s);
+	""" % (FINGERPRINTS_TABLENAME, Database.FIELD_HASH, Database.FIELD_SONG_ID, Database.FIELD_OFFSET)
+
+	INSERT_SONG = "INSERT INTO %s (%s, %s) values (%%s, UNHEX(%%s));" % (
+		SONGS_TABLENAME, Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1)
+
+	# selects
+	SELECT = """
+		SELECT %s, %s FROM %s WHERE %s = UNHEX(%%s);
+	""" % (Database.FIELD_SONG_ID, Database.FIELD_OFFSET, FINGERPRINTS_TABLENAME, Database.FIELD_HASH)
+
+	SELECT_MULTIPLE = """
+		SELECT HEX(%s), %s, %s FROM %s WHERE %s IN (%%s);
+	""" % (Database.FIELD_HASH, Database.FIELD_SONG_ID, Database.FIELD_OFFSET,
+		   FINGERPRINTS_TABLENAME, Database.FIELD_HASH)
+
+	SELECT_ALL = """
+		SELECT %s, %s FROM %s;
+	""" % (Database.FIELD_SONG_ID, Database.FIELD_OFFSET, FINGERPRINTS_TABLENAME)
+
+	SELECT_SONG = """
+		SELECT %s, HEX(%s) as %s FROM %s WHERE %s = %%s;
+	""" % (Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1, Database.FIELD_FILE_SHA1, SONGS_TABLENAME, Database.FIELD_SONG_ID)
+
+	SELECT_NUM_FINGERPRINTS = """
+		SELECT COUNT(*) as n FROM %s
+	""" % (FINGERPRINTS_TABLENAME)
+
+	SELECT_UNIQUE_SONG_IDS = """
+		SELECT COUNT(DISTINCT %s) as n FROM %s WHERE %s = 1;
+	""" % (Database.FIELD_SONG_ID, SONGS_TABLENAME, FIELD_FINGERPRINTED)
+
+	SELECT_SONGS = """
+		SELECT %s, %s, HEX(%s) as %s FROM %s WHERE %s = 1;
+	""" % (Database.FIELD_SONG_ID, Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1, Database.FIELD_FILE_SHA1,
+		   SONGS_TABLENAME, FIELD_FINGERPRINTED)
+
+	# drops
+	DROP_FINGERPRINTS = "DROP TABLE IF EXISTS %s;" % FINGERPRINTS_TABLENAME
+	DROP_SONGS = "DROP TABLE IF EXISTS %s;" % SONGS_TABLENAME
+
+	# update
+	UPDATE_SONG_FINGERPRINTED = """
+		UPDATE %s SET %s = 1 WHERE %s = %%s
+	""" % (SONGS_TABLENAME, FIELD_FINGERPRINTED, Database.FIELD_SONG_ID)
+
+	# delete
+	DELETE_UNFINGERPRINTED = """
+		DELETE FROM %s WHERE %s = 0;
+	""" % (SONGS_TABLENAME, FIELD_FINGERPRINTED)
+
+	def __init__(self, **options):
+		super(SQLDatabase, self).__init__()
+		self.cursor = cursor_factory(**options)
+		self._options = options
+
+	def after_fork(self):
+		# Clear the cursor cache, we don't want any stale connections from
+		# the previous process.
+		Cursor.clear_cache()
+
+	def setup(self):
+		"""
+		Creates any non-existing tables required for dejavu to function.
+
+		This also removes all songs that have been added but have no
+		fingerprints associated with them.
+		"""
+		with self.cursor() as cur:
+			cur.execute(self.CREATE_SONGS_TABLE)
+			cur.execute(self.CREATE_FINGERPRINTS_TABLE)
+			cur.execute(self.DELETE_UNFINGERPRINTED)
+
+	def empty(self):
+		"""
+		Drops tables created by dejavu and then creates them again
+		by calling `SQLDatabase.setup`.
+
+		.. warning:
+			This will result in a loss of data
+		"""
+		with self.cursor() as cur:
+			cur.execute(self.DROP_FINGERPRINTS)
+			cur.execute(self.DROP_SONGS)
+
+		self.setup()
+
+	def delete_unfingerprinted_songs(self):
+		"""
+		Removes all songs that have no fingerprints associated with them.
+		"""
+		with self.cursor() as cur:
+			cur.execute(self.DELETE_UNFINGERPRINTED)
+
+	def get_num_songs(self):
+		"""
+		Returns number of songs the database has fingerprinted.
+		"""
+		with self.cursor() as cur:
+			cur.execute(self.SELECT_UNIQUE_SONG_IDS)
+
+			for count, in cur:
+				return count
+			return 0
+
+	def get_num_fingerprints(self):
+		"""
+		Returns number of fingerprints the database has fingerprinted.
+		"""
+		with self.cursor() as cur:
+			cur.execute(self.SELECT_NUM_FINGERPRINTS)
+
+			for count, in cur:
+				return count
+			return 0
+
+	def set_song_fingerprinted(self, sid):
+		"""
+		Set the fingerprinted flag to TRUE (1) once a song has been completely
+		fingerprinted in the database.
+		"""
+		with self.cursor() as cur:
+			cur.execute(self.UPDATE_SONG_FINGERPRINTED, (sid,))
+
+	def get_songs(self):
+		"""
+		Return songs that have the fingerprinted flag set TRUE (1).
+		"""
+		with self.cursor(cursor_class=cursor.MySQLCursorDict) as cur:
+			cur.execute(self.SELECT_SONGS)
+			for row in cur:
+				yield row
+
+	def get_song_by_id(self, sid):
+		"""
+		Returns song by its ID.
+		"""
+		with self.cursor(cursor_class=cursor.MySQLCursorDict) as cur:
+			cur.execute(self.SELECT_SONG, (sid,))
+			return cur.fetchone()
+
+	def insert(self, hash, sid, offset):
+		"""
+		Insert a (sha1, song_id, offset) row into database.
+		"""
+		with self.cursor() as cur:
+			cur.execute(self.INSERT_FINGERPRINT, (hash, sid, offset))
+
+	def insert_song(self, songname, file_hash):
+		"""
+		Inserts song in the database and returns the ID of the inserted record.
+		"""
+		with self.cursor() as cur:
+			cur.execute(self.INSERT_SONG, (songname, file_hash))
+			return cur.lastrowid
+
+	def query(self, hash):
+		"""
+		Return all tuples associated with hash.
+
+		If hash is None, returns all entries in the
+		database (be careful with that one!).
+		"""
+		# select all if no key
+		query = self.SELECT_ALL if hash is None else self.SELECT
+
+		with self.cursor() as cur:
+			cur.execute(query)
+			for sid, offset in cur:
+				yield (sid, offset)
+
+	def get_iterable_kv_pairs(self):
+		"""
+		Returns all tuples in database.
+		"""
+		return self.query(None)
+
+	def insert_hashes(self, sid, hashes):
+		"""
+		Insert series of hash => song_id, offset
+		values into the database.
+		"""
+		values = []
+		for hashit, offset in hashes:
+			values.append((hashit, int(sid), int(offset)))
+
+		with self.cursor() as cur:
+			for split_values in grouper(values, 1000):
+				lst = list(split_values)
+				#print('split_vals', lst)
+				cur.executemany(self.INSERT_FINGERPRINT, lst)
+
+	def return_matches(self, hashes):
+		"""
+		Return the (song_id, offset_diff) tuples associated with
+		a list of (sha1, sample_offset) values.
+		"""
+		# Create a dictionary of hash => offset pairs for later lookups
+		mapper = {}
+		for hashit, offset in hashes:
+			mapper[hashit.upper()] = offset
+
+		# Get an iteratable of all the hashes we need
+		values = mapper.keys()
+
+		with self.cursor() as cur:
+			for split_values in grouper(values, 1000):
+				# Create our IN part of the query
+				query = self.SELECT_MULTIPLE
+				lstvals = list(split_values)
+				query = query % ', '.join(['UNHEX(%s)'] * len(lstvals))
+
+				cur.execute(query, lstvals)
+
+				for hashit, sid, offset in cur:
+					# (sid, db_offset - song_sampled_offset)
+					yield (sid, offset - mapper[hashit])
+
+	def __getstate__(self):
+		return (self._options,)
+
+	def __setstate__(self, state):
+		self._options, = state
+		self.cursor = cursor_factory(**self._options)
 
 
 def grouper(iterable, n, fillvalue=None):
-    args = [iter(iterable)] * n
-    return (filter(None, values) for values
-            in izip_longest(fillvalue=fillvalue, *args))
+	args = [iter(iterable)] * n
+	return (filter(None, values) for values
+			in zip_longest(*args, fillvalue=fillvalue))
 
 
 def cursor_factory(**factory_options):
-    def cursor(**options):
-        options.update(factory_options)
-        return Cursor(**options)
-    return cursor
-
-
-class Cursor(object):
-    """
-    Establishes a connection to the database and returns an open cursor.
-
-
-    ```python
-    # Use as context manager
-    with Cursor() as cur:
-        cur.execute(query)
-    ```
-    """
-    _cache = Queue.Queue(maxsize=5)
-
-    def __init__(self, cursor_type=mysql.cursors.Cursor, **options):
-        super(Cursor, self).__init__()
-
-        try:
-            conn = self._cache.get_nowait()
-        except Queue.Empty:
-            conn = mysql.connect(**options)
-        else:
-            # Ping the connection before using it from the cache.
-            conn.ping(True)
-
-        self.conn = conn
-        self.conn.autocommit(False)
-        self.cursor_type = cursor_type
-
-    @classmethod
-    def clear_cache(cls):
-        cls._cache = Queue.Queue(maxsize=5)
-
-    def __enter__(self):
-        self.cursor = self.conn.cursor(self.cursor_type)
-        return self.cursor
-
-    def __exit__(self, extype, exvalue, traceback):
-        # if we had a MySQL related error we try to rollback the cursor.
-        if extype is mysql.MySQLError:
-            self.cursor.rollback()
-
-        self.cursor.close()
-        self.conn.commit()
-
-        # Put it back on the queue
-        try:
-            self._cache.put_nowait(self.conn)
-        except Queue.Full:
-            self.conn.close()
+	def cursor(**options):
+		options.update(factory_options)
+		return Cursor(**options)
+	return cursor
+
+
+class Cursor():
+	"""
+	Establishes a connection to the database and returns an open cursor.
+
+
+	```python
+	# Use as context manager
+	with Cursor() as cur:
+		cur.execute(query)
+	```
+	"""
+	_cache = queue.Queue(maxsize=5)
+
+	def __init__(self, cursor_class=cursor.MySQLCursor, **options):
+		super(Cursor, self).__init__()
+
+		try:
+			conn = self._cache.get_nowait()
+		except queue.Empty:
+			conn = connection.MySQLConnection(**options)
+		else:
+			# Ping the connection before using it from the cache.
+			conn.ping(True)
+
+		self.conn = conn
+		#self.conn.autocommit(False)
+		if options is not None and 'cursor_class' in options:
+			cursor_class = options['cursor_class']
+		self.cursor_class = cursor_class
+
+	@classmethod
+	def clear_cache(cls):
+		cls._cache = queue.Queue(maxsize=5)
+
+	def __enter__(self):
+		self.cursor = self.conn.cursor(cursor_class=self.cursor_class)
+		return self.cursor
+
+	def __exit__(self, extype, exvalue, traceback):
+		# if we had a MySQL related error we try to rollback the cursor.
+		if extype is not None:
+			self.conn.rollback()
+
+		self.cursor.close()
+		self.conn.commit()
+
+		# Put it back on the queue
+		try:
+			self._cache.put_nowait(self.conn)
+		except Queue.Full:
+			self.conn.close()
diff --git a/dejavu/decoder.py b/dejavu/decoder.py
index 04aa39f4..b5144f57 100755
--- a/dejavu/decoder.py
+++ b/dejavu/decoder.py
@@ -56,7 +56,7 @@ def read(filename, limit=None):
         data = np.fromstring(audiofile._data, np.int16)
 
         channels = []
-        for chn in xrange(audiofile.channels):
+        for chn in range(audiofile.channels):
             channels.append(data[chn::audiofile.channels])
 
         fs = audiofile.frame_rate
diff --git a/dejavu/fingerprint.py b/dejavu/fingerprint.py
index 4db321b5..fc0f22ba 100755
--- a/dejavu/fingerprint.py
+++ b/dejavu/fingerprint.py
@@ -3,7 +3,7 @@
 import matplotlib.pyplot as plt
 from scipy.ndimage.filters import maximum_filter
 from scipy.ndimage.morphology import (generate_binary_structure,
-                                      iterate_structure, binary_erosion)
+									  iterate_structure, binary_erosion)
 import hashlib
 from operator import itemgetter
 
@@ -62,94 +62,96 @@
 FINGERPRINT_REDUCTION = 20
 
 def fingerprint(channel_samples, Fs=DEFAULT_FS,
-                wsize=DEFAULT_WINDOW_SIZE,
-                wratio=DEFAULT_OVERLAP_RATIO,
-                fan_value=DEFAULT_FAN_VALUE,
-                amp_min=DEFAULT_AMP_MIN):
-    """
-    FFT the channel, log transform output, find local maxima, then return
-    locally sensitive hashes.
-    """
-    # FFT the signal and extract frequency components
-    arr2D = mlab.specgram(
-        channel_samples,
-        NFFT=wsize,
-        Fs=Fs,
-        window=mlab.window_hanning,
-        noverlap=int(wsize * wratio))[0]
-
-    # apply log transform since specgram() returns linear array
-    arr2D = 10 * np.log10(arr2D)
-    arr2D[arr2D == -np.inf] = 0  # replace infs with zeros
-
-    # find local maxima
-    local_maxima = get_2D_peaks(arr2D, plot=False, amp_min=amp_min)
-
-    # return hashes
-    return generate_hashes(local_maxima, fan_value=fan_value)
+				wsize=DEFAULT_WINDOW_SIZE,
+				wratio=DEFAULT_OVERLAP_RATIO,
+				fan_value=DEFAULT_FAN_VALUE,
+				amp_min=DEFAULT_AMP_MIN):
+	"""
+	FFT the channel, log transform output, find local maxima, then return
+	locally sensitive hashes.
+	"""
+	# FFT the signal and extract frequency components
+	arr2D = mlab.specgram(
+		channel_samples,
+		NFFT=wsize,
+		Fs=Fs,
+		window=mlab.window_hanning,
+		noverlap=int(wsize * wratio))[0]
+
+	# apply log transform since specgram() returns linear array
+	arr2D = 10 * np.log10(arr2D)
+	arr2D[arr2D == -np.inf] = 0  # replace infs with zeros
+
+	# find local maxima
+	local_maxima = get_2D_peaks(arr2D, plot=False, amp_min=amp_min)
+
+	# return hashes
+	return generate_hashes(local_maxima, fan_value=fan_value)
 
 
 def get_2D_peaks(arr2D, plot=False, amp_min=DEFAULT_AMP_MIN):
-    # http://docs.scipy.org/doc/scipy/reference/generated/scipy.ndimage.morphology.iterate_structure.html#scipy.ndimage.morphology.iterate_structure
-    struct = generate_binary_structure(2, 1)
-    neighborhood = iterate_structure(struct, PEAK_NEIGHBORHOOD_SIZE)
-
-    # find local maxima using our fliter shape
-    local_max = maximum_filter(arr2D, footprint=neighborhood) == arr2D
-    background = (arr2D == 0)
-    eroded_background = binary_erosion(background, structure=neighborhood,
-                                       border_value=1)
-
-    # Boolean mask of arr2D with True at peaks
-    detected_peaks = local_max - eroded_background
-
-    # extract peaks
-    amps = arr2D[detected_peaks]
-    j, i = np.where(detected_peaks)
-
-    # filter peaks
-    amps = amps.flatten()
-    peaks = zip(i, j, amps)
-    peaks_filtered = [x for x in peaks if x[2] > amp_min]  # freq, time, amp
-
-    # get indices for frequency and time
-    frequency_idx = [x[1] for x in peaks_filtered]
-    time_idx = [x[0] for x in peaks_filtered]
-
-    if plot:
-        # scatter of the peaks
-        fig, ax = plt.subplots()
-        ax.imshow(arr2D)
-        ax.scatter(time_idx, frequency_idx)
-        ax.set_xlabel('Time')
-        ax.set_ylabel('Frequency')
-        ax.set_title("Spectrogram")
-        plt.gca().invert_yaxis()
-        plt.show()
-
-    return zip(frequency_idx, time_idx)
+	# http://docs.scipy.org/doc/scipy/reference/generated/scipy.ndimage.morphology.iterate_structure.html#scipy.ndimage.morphology.iterate_structure
+	struct = generate_binary_structure(2, 1)
+	neighborhood = iterate_structure(struct, PEAK_NEIGHBORHOOD_SIZE)
+
+	# find local maxima using our fliter shape
+	local_max = maximum_filter(arr2D, footprint=neighborhood) == arr2D
+	background = (arr2D == 0)
+	eroded_background = binary_erosion(background, structure=neighborhood,
+									   border_value=1)
+
+	# Boolean mask of arr2D with True at peaks
+	detected_peaks = local_max - eroded_background
+
+	# extract peaks
+	amps = arr2D[detected_peaks]
+	j, i = np.where(detected_peaks)
+
+	# filter peaks
+	amps = amps.flatten()
+	peaks = zip(i, j, amps)
+	peaks_filtered = [x for x in peaks if x[2] > amp_min]  # freq, time, amp
+
+	# get indices for frequency and time
+	frequency_idx = [x[1] for x in peaks_filtered]
+	time_idx = [x[0] for x in peaks_filtered]
+
+	if plot:
+		# scatter of the peaks
+		fig, ax = plt.subplots()
+		ax.imshow(arr2D)
+		ax.scatter(time_idx, frequency_idx)
+		ax.set_xlabel('Time')
+		ax.set_ylabel('Frequency')
+		ax.set_title("Spectrogram")
+		plt.gca().invert_yaxis()
+		plt.show()
+
+	return zip(frequency_idx, time_idx)
 
 
 def generate_hashes(peaks, fan_value=DEFAULT_FAN_VALUE):
-    """
-    Hash list structure:
-       sha1_hash[0:20]    time_offset
-    [(e05b341a9b77a51fd26, 32), ... ]
-    """
-    if PEAK_SORT:
-        peaks.sort(key=itemgetter(1))
-
-    for i in range(len(peaks)):
-        for j in range(1, fan_value):
-            if (i + j) < len(peaks):
-                
-                freq1 = peaks[i][IDX_FREQ_I]
-                freq2 = peaks[i + j][IDX_FREQ_I]
-                t1 = peaks[i][IDX_TIME_J]
-                t2 = peaks[i + j][IDX_TIME_J]
-                t_delta = t2 - t1
-
-                if t_delta >= MIN_HASH_TIME_DELTA and t_delta <= MAX_HASH_TIME_DELTA:
-                    h = hashlib.sha1(
-                        "%s|%s|%s" % (str(freq1), str(freq2), str(t_delta)))
-                    yield (h.hexdigest()[0:FINGERPRINT_REDUCTION], t1)
+	"""
+	Hash list structure:
+	   sha1_hash[0:20]    time_offset
+	[(e05b341a9b77a51fd26, 32), ... ]
+	"""
+	if PEAK_SORT:
+		peaks = sorted(peaks, key=itemgetter(1))
+
+	lenPeaks = len(peaks)
+	#print("lenPeaks", lenPeaks)
+	for i in range(lenPeaks):
+		for j in range(1, fan_value):
+			if (i + j) < lenPeaks:
+				
+				freq1 = peaks[i][IDX_FREQ_I]
+				freq2 = peaks[i + j][IDX_FREQ_I]
+				t1 = peaks[i][IDX_TIME_J]
+				t2 = peaks[i + j][IDX_TIME_J]
+				t_delta = t2 - t1
+
+				if t_delta >= MIN_HASH_TIME_DELTA and t_delta <= MAX_HASH_TIME_DELTA:
+					h = hashlib.sha1(
+						"{}|{}|{}".format(str(freq1), str(freq2), str(t_delta)).encode('utf-8'))
+					yield (h.hexdigest()[0:FINGERPRINT_REDUCTION], t1)
diff --git a/dejavu/recognize.py b/dejavu/recognize.py
index b43a8791..d546aa01 100755
--- a/dejavu/recognize.py
+++ b/dejavu/recognize.py
@@ -4,109 +4,109 @@
 import pyaudio
 import time
 
+class BaseRecognizer():
 
-class BaseRecognizer(object):
+	def __init__(self, dejavu):
+		self.dejavu = dejavu
+		self.Fs = fingerprint.DEFAULT_FS
 
-    def __init__(self, dejavu):
-        self.dejavu = dejavu
-        self.Fs = fingerprint.DEFAULT_FS
+	def _recognize(self, *data):
+		matches = []
+		for d in data:
+			matches.extend(self.dejavu.find_matches(d, Fs=self.Fs))
 
-    def _recognize(self, *data):
-        matches = []
-        for d in data:
-            matches.extend(self.dejavu.find_matches(d, Fs=self.Fs))
-        return self.dejavu.align_matches(matches)
+		return self.dejavu.align_matches(matches)
 
-    def recognize(self):
-        pass  # base class does nothing
+	def recognize(self):
+		pass  # base class does nothing
 
 
 class FileRecognizer(BaseRecognizer):
-    def __init__(self, dejavu):
-        super(FileRecognizer, self).__init__(dejavu)
+	def __init__(self, dejavu):
+		super(FileRecognizer, self).__init__(dejavu)
 
-    def recognize_file(self, filename):
-        frames, self.Fs, file_hash = decoder.read(filename, self.dejavu.limit)
+	def recognize_file(self, filename):
+		frames, self.Fs, file_hash = decoder.read(filename, self.dejavu.limit)
 
-        t = time.time()
-        match = self._recognize(*frames)
-        t = time.time() - t
+		t = time.time()
+		match = self._recognize(*frames)
+		t = time.time() - t
 
-        if match:
-            match['match_time'] = t
+		if match:
+			match['match_time'] = t
 
-        return match
+		return match
 
-    def recognize(self, filename):
-        return self.recognize_file(filename)
+	def recognize(self, filename):
+		return self.recognize_file(filename)
 
 
 class MicrophoneRecognizer(BaseRecognizer):
-    default_chunksize   = 8192
-    default_format      = pyaudio.paInt16
-    default_channels    = 2
-    default_samplerate  = 44100
-
-    def __init__(self, dejavu):
-        super(MicrophoneRecognizer, self).__init__(dejavu)
-        self.audio = pyaudio.PyAudio()
-        self.stream = None
-        self.data = []
-        self.channels = MicrophoneRecognizer.default_channels
-        self.chunksize = MicrophoneRecognizer.default_chunksize
-        self.samplerate = MicrophoneRecognizer.default_samplerate
-        self.recorded = False
-
-    def start_recording(self, channels=default_channels,
-                        samplerate=default_samplerate,
-                        chunksize=default_chunksize):
-        self.chunksize = chunksize
-        self.channels = channels
-        self.recorded = False
-        self.samplerate = samplerate
-
-        if self.stream:
-            self.stream.stop_stream()
-            self.stream.close()
-
-        self.stream = self.audio.open(
-            format=self.default_format,
-            channels=channels,
-            rate=samplerate,
-            input=True,
-            frames_per_buffer=chunksize,
-        )
-
-        self.data = [[] for i in range(channels)]
-
-    def process_recording(self):
-        data = self.stream.read(self.chunksize)
-        nums = np.fromstring(data, np.int16)
-        for c in range(self.channels):
-            self.data[c].extend(nums[c::self.channels])
-
-    def stop_recording(self):
-        self.stream.stop_stream()
-        self.stream.close()
-        self.stream = None
-        self.recorded = True
-
-    def recognize_recording(self):
-        if not self.recorded:
-            raise NoRecordingError("Recording was not complete/begun")
-        return self._recognize(*self.data)
-
-    def get_recorded_time(self):
-        return len(self.data[0]) / self.rate
-
-    def recognize(self, seconds=10):
-        self.start_recording()
-        for i in range(0, int(self.samplerate / self.chunksize
-                              * seconds)):
-            self.process_recording()
-        self.stop_recording()
-        return self.recognize_recording()
+	default_chunksize   = 8192
+	default_format      = pyaudio.paInt16
+	default_channels    = 2
+	default_samplerate  = 44100
+
+	def __init__(self, dejavu):
+		super(MicrophoneRecognizer, self).__init__(dejavu)
+		self.audio = pyaudio.PyAudio()
+		self.stream = None
+		self.data = []
+		self.channels = MicrophoneRecognizer.default_channels
+		self.chunksize = MicrophoneRecognizer.default_chunksize
+		self.samplerate = MicrophoneRecognizer.default_samplerate
+		self.recorded = False
+
+	def start_recording(self, channels=default_channels,
+						samplerate=default_samplerate,
+						chunksize=default_chunksize):
+		self.chunksize = chunksize
+		self.channels = channels
+		self.recorded = False
+		self.samplerate = samplerate
+
+		if self.stream:
+			self.stream.stop_stream()
+			self.stream.close()
+
+		self.stream = self.audio.open(
+			format=self.default_format,
+			channels=channels,
+			rate=samplerate,
+			input=True,
+			frames_per_buffer=chunksize,
+		)
+
+		self.data = [[] for i in range(channels)]
+
+	def process_recording(self):
+		data = self.stream.read(self.chunksize)
+		nums = np.fromstring(data, np.int16)
+		for c in range(self.channels):
+			self.data[c].extend(nums[c::self.channels])
+
+	def stop_recording(self):
+		self.stream.stop_stream()
+		self.stream.close()
+		self.stream = None
+		self.recorded = True
+
+	def recognize_recording(self):
+		if not self.recorded:
+			raise NoRecordingError("Recording was not complete/begun")
+		return self._recognize(*self.data)
+
+	def get_recorded_time(self):
+		return len(self.data[0]) / self.rate
+
+	def recognize(self, seconds=10):
+		self.start_recording()
+		for i in range(0, int(self.samplerate / self.chunksize
+							  * seconds)):
+			self.process_recording()
+		self.stop_recording()
+		return self.recognize_recording()
 
 
 class NoRecordingError(Exception):
-    pass
+	pass

From 162c1f27715d019ea7bcf80e9705a09ba1678fda Mon Sep 17 00:00:00 2001
From: David Dietrich <grayarrow@gmail.com>
Date: Sat, 3 Jun 2017 12:11:45 -0700
Subject: [PATCH 2/4] Upgraded to Python 3.

---
 example.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/example.py b/example.py
index 1c99e69c..b9023dd2 100755
--- a/example.py
+++ b/example.py
@@ -6,7 +6,7 @@
 from dejavu.recognize import FileRecognizer, MicrophoneRecognizer
 
 # load config from a JSON file (or anything outputting a python dictionary)
-with open("dejavu.cnf.SAMPLE") as f:
+with open("dejavu.cnf") as f:
     config = json.load(f)
 
 if __name__ == '__main__':
@@ -19,17 +19,17 @@
 
 	# Recognize audio from a file
 	song = djv.recognize(FileRecognizer, "mp3/Sean-Fournier--Falling-For-You.mp3")
-	print "From file we recognized: %s\n" % song
+	print("From file we recognized: {}\n".format(song))
 
 	# Or recognize audio from your microphone for `secs` seconds
-	secs = 5
-	song = djv.recognize(MicrophoneRecognizer, seconds=secs)
-	if song is None:
-		print "Nothing recognized -- did you play the song out loud so your mic could hear it? :)"
-	else:
-		print "From mic with %d seconds we recognized: %s\n" % (secs, song)
+#	secs = 5
+#	song = djv.recognize(MicrophoneRecognizer, seconds=secs)
+#	if song is None:
+#		print("Nothing recognized -- did you play the song out loud so your mic could hear it? :)")
+#	else:
+#		print("From mic with %d seconds we recognized: %s\n" % (secs, song))
 
 	# Or use a recognizer without the shortcut, in anyway you would like
-	recognizer = FileRecognizer(djv)
-	song = recognizer.recognize_file("mp3/Josh-Woodward--I-Want-To-Destroy-Something-Beautiful.mp3")
-	print "No shortcut, we recognized: %s\n" % song
\ No newline at end of file
+#	recognizer = FileRecognizer(djv)
+#	song = recognizer.recognize_file("mp3/Josh-Woodward--I-Want-To-Destroy-Something-Beautiful.mp3")
+#	print("No shortcut, we recognized: {}\n".format(song))
\ No newline at end of file

From 9047441de9b32979944e3ea058765e4db9d31e97 Mon Sep 17 00:00:00 2001
From: David Dietrich <ddietrich@grayarrow.com>
Date: Sun, 4 Jun 2017 04:11:35 -0700
Subject: [PATCH 3/4] grouper now returning a list instead of a filter()
 iterator.

---
 dejavu/database_sql.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/dejavu/database_sql.py b/dejavu/database_sql.py
index 2bd5adc0..85321d40 100755
--- a/dejavu/database_sql.py
+++ b/dejavu/database_sql.py
@@ -274,9 +274,8 @@ def insert_hashes(self, sid, hashes):
 
 		with self.cursor() as cur:
 			for split_values in grouper(values, 1000):
-				lst = list(split_values)
-				#print('split_vals', lst)
-				cur.executemany(self.INSERT_FINGERPRINT, lst)
+				#print('split_vals', split_values)
+				cur.executemany(self.INSERT_FINGERPRINT, split_values)
 
 	def return_matches(self, hashes):
 		"""
@@ -295,10 +294,9 @@ def return_matches(self, hashes):
 			for split_values in grouper(values, 1000):
 				# Create our IN part of the query
 				query = self.SELECT_MULTIPLE
-				lstvals = list(split_values)
-				query = query % ', '.join(['UNHEX(%s)'] * len(lstvals))
+				query = query % ', '.join(['UNHEX(%s)'] * len(split_values))
 
-				cur.execute(query, lstvals)
+				cur.execute(query, split_values)
 
 				for hashit, sid, offset in cur:
 					# (sid, db_offset - song_sampled_offset)
@@ -314,8 +312,7 @@ def __setstate__(self, state):
 
 def grouper(iterable, n, fillvalue=None):
 	args = [iter(iterable)] * n
-	return (filter(None, values) for values
-			in zip_longest(*args, fillvalue=fillvalue))
+	return (list(filter(None, values)) for values in zip_longest(*args, fillvalue=fillvalue))
 
 
 def cursor_factory(**factory_options):

From b4f10e15fdf2b11c53800d7e55bb8d9f044c57dd Mon Sep 17 00:00:00 2001
From: David Dietrich <ddietrich@grayarrow.com>
Date: Sun, 4 Jun 2017 05:01:49 -0700
Subject: [PATCH 4/4] Updated to v0.2.0 and added maintainer email for this
 fork.

---
 requirements.txt | 1 +
 setup.py         | 8 ++++----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 9478f734..c9df2f70 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,4 +6,5 @@ PyAudio>=0.2.7
 numpy>=1.8.2
 scipy>=0.12.1
 matplotlib>=1.3.1
+mysql-connector-python>=1.1
 ### END ###
diff --git a/setup.py b/setup.py
index 8312d1d5..72fc5e8e 100644
--- a/setup.py
+++ b/setup.py
@@ -15,7 +15,7 @@ def parse_requirements(requirements):
         return reqs
 
 PACKAGE_NAME = "PyDejavu"
-PACKAGE_VERSION = "0.1.3"
+PACKAGE_VERSION = "0.2.0"
 SUMMARY = 'Dejavu: Audio Fingerprinting in Python'
 DESCRIPTION = """
 Audio fingerprinting and recognition algorithm implemented in Python
@@ -40,9 +40,9 @@ def parse_requirements(requirements):
     long_description=DESCRIPTION,
     author='Will Drevo',
     author_email='will.drevo@gmail.com',
-    maintainer="Will Drevo",
-    maintainer_email="will.drevo@gmail.com",
-    url='http://github.com/tuxdna/dejavu',
+    maintainer="David Dietrich",
+    maintainer_email='"David Dietrich" <grayarrow@gmail.com>',
+    url='http://github.com/grayarrow/dejavu',
     license='MIT License',
     include_package_data=True,
     packages=find_packages(),