From 34500d102b4b7f21f4fed596bed8c0b88c2f9e34 Mon Sep 17 00:00:00 2001 From: David Dietrich Date: Sat, 3 Jun 2017 12:11:24 -0700 Subject: [PATCH 1/4] Upgraded to Python 3. --- dejavu.py | 2 +- dejavu/__init__.py | 380 ++++++++++----------- dejavu/database.py | 3 +- dejavu/database_sql.py | 725 +++++++++++++++++++++-------------------- dejavu/decoder.py | 2 +- dejavu/fingerprint.py | 174 +++++----- dejavu/recognize.py | 176 +++++----- 7 files changed, 735 insertions(+), 727 deletions(-) diff --git a/dejavu.py b/dejavu.py index 0bb4d0c4..ee792f53 100755 --- a/dejavu.py +++ b/dejavu.py @@ -13,7 +13,7 @@ warnings.filterwarnings("ignore") -DEFAULT_CONFIG_FILE = "dejavu.cnf.SAMPLE" +DEFAULT_CONFIG_FILE = "dejavu.cnf" def init(configpath): diff --git a/dejavu/__init__.py b/dejavu/__init__.py index 4f6e6e8a..1321084e 100755 --- a/dejavu/__init__.py +++ b/dejavu/__init__.py @@ -1,202 +1,204 @@ from dejavu.database import get_database, Database import dejavu.decoder as decoder -import fingerprint +import dejavu.fingerprint as fingerprint import multiprocessing import os import traceback import sys - -class Dejavu(object): - - SONG_ID = "song_id" - SONG_NAME = 'song_name' - CONFIDENCE = 'confidence' - MATCH_TIME = 'match_time' - OFFSET = 'offset' - OFFSET_SECS = 'offset_seconds' - - def __init__(self, config): - super(Dejavu, self).__init__() - - self.config = config - - # initialize db - db_cls = get_database(config.get("database_type", None)) - - self.db = db_cls(**config.get("database", {})) - self.db.setup() - - # if we should limit seconds fingerprinted, - # None|-1 means use entire track - self.limit = self.config.get("fingerprint_limit", None) - if self.limit == -1: # for JSON compatibility - self.limit = None - self.get_fingerprinted_songs() - - def get_fingerprinted_songs(self): - # get songs previously indexed - self.songs = self.db.get_songs() - self.songhashes_set = set() # to know which ones we've computed before - for song in self.songs: - song_hash = song[Database.FIELD_FILE_SHA1] - self.songhashes_set.add(song_hash) - - def fingerprint_directory(self, path, extensions, nprocesses=None): - # Try to use the maximum amount of processes if not given. - try: - nprocesses = nprocesses or multiprocessing.cpu_count() - except NotImplementedError: - nprocesses = 1 - else: - nprocesses = 1 if nprocesses <= 0 else nprocesses - - pool = multiprocessing.Pool(nprocesses) - - filenames_to_fingerprint = [] - for filename, _ in decoder.find_files(path, extensions): - - # don't refingerprint already fingerprinted files - if decoder.unique_hash(filename) in self.songhashes_set: - print "%s already fingerprinted, continuing..." % filename - continue - - filenames_to_fingerprint.append(filename) - - # Prepare _fingerprint_worker input - worker_input = zip(filenames_to_fingerprint, - [self.limit] * len(filenames_to_fingerprint)) - - # Send off our tasks - iterator = pool.imap_unordered(_fingerprint_worker, - worker_input) - - # Loop till we have all of them - while True: - try: - song_name, hashes, file_hash = iterator.next() - except multiprocessing.TimeoutError: - continue - except StopIteration: - break - except: - print("Failed fingerprinting") - # Print traceback because we can't reraise it here - traceback.print_exc(file=sys.stdout) - else: - sid = self.db.insert_song(song_name, file_hash) - - self.db.insert_hashes(sid, hashes) - self.db.set_song_fingerprinted(sid) - self.get_fingerprinted_songs() - - pool.close() - pool.join() - - def fingerprint_file(self, filepath, song_name=None): - songname = decoder.path_to_songname(filepath) - song_hash = decoder.unique_hash(filepath) - song_name = song_name or songname - # don't refingerprint already fingerprinted files - if song_hash in self.songhashes_set: - print "%s already fingerprinted, continuing..." % song_name - else: - song_name, hashes, file_hash = _fingerprint_worker( - filepath, - self.limit, - song_name=song_name - ) - sid = self.db.insert_song(song_name, file_hash) - - self.db.insert_hashes(sid, hashes) - self.db.set_song_fingerprinted(sid) - self.get_fingerprinted_songs() - - def find_matches(self, samples, Fs=fingerprint.DEFAULT_FS): - hashes = fingerprint.fingerprint(samples, Fs=Fs) - return self.db.return_matches(hashes) - - def align_matches(self, matches): - """ - Finds hash matches that align in time with other matches and finds - consensus about which hashes are "true" signal from the audio. - - Returns a dictionary with match information. - """ - # align by diffs - diff_counter = {} - largest = 0 - largest_count = 0 - song_id = -1 - for tup in matches: - sid, diff = tup - if diff not in diff_counter: - diff_counter[diff] = {} - if sid not in diff_counter[diff]: - diff_counter[diff][sid] = 0 - diff_counter[diff][sid] += 1 - - if diff_counter[diff][sid] > largest_count: - largest = diff - largest_count = diff_counter[diff][sid] - song_id = sid - - # extract idenfication - song = self.db.get_song_by_id(song_id) - if song: - # TODO: Clarify what `get_song_by_id` should return. - songname = song.get(Dejavu.SONG_NAME, None) - else: - return None - - # return match info - nseconds = round(float(largest) / fingerprint.DEFAULT_FS * - fingerprint.DEFAULT_WINDOW_SIZE * - fingerprint.DEFAULT_OVERLAP_RATIO, 5) - song = { - Dejavu.SONG_ID : song_id, - Dejavu.SONG_NAME : songname, - Dejavu.CONFIDENCE : largest_count, - Dejavu.OFFSET : int(largest), - Dejavu.OFFSET_SECS : nseconds, - Database.FIELD_FILE_SHA1 : song.get(Database.FIELD_FILE_SHA1, None),} - return song - - def recognize(self, recognizer, *options, **kwoptions): - r = recognizer(self) - return r.recognize(*options, **kwoptions) +class Dejavu(): + + SONG_ID = "song_id" + SONG_NAME = 'song_name' + CONFIDENCE = 'confidence' + MATCH_TIME = 'match_time' + OFFSET = 'offset' + OFFSET_SECS = 'offset_seconds' + + def __init__(self, config): + super(Dejavu, self).__init__() + + self.config = config + + # initialize db + db_cls = get_database(config.get("database_type", None)) + + self.db = db_cls(**config.get("database", {})) + self.db.setup() + + # if we should limit seconds fingerprinted, + # None|-1 means use entire track + self.limit = self.config.get("fingerprint_limit", None) + if self.limit == -1: # for JSON compatibility + self.limit = None + self.get_fingerprinted_songs() + + def get_fingerprinted_songs(self): + # get songs previously indexed + self.songs = self.db.get_songs() + self.songhashes_set = set() # to know which ones we've computed before + + for song in self.songs: + #print('song', song) + song_hash = song[Database.FIELD_FILE_SHA1] + self.songhashes_set.add(song_hash) + + def fingerprint_directory(self, path, extensions, nprocesses=None): + # Try to use the maximum amount of processes if not given. + try: + nprocesses = nprocesses or multiprocessing.cpu_count() + except NotImplementedError: + nprocesses = 1 + else: + nprocesses = 1 if nprocesses <= 0 else nprocesses + + pool = multiprocessing.Pool(nprocesses) + + filenames_to_fingerprint = [] + for filename, _ in decoder.find_files(path, extensions): + + # don't refingerprint already fingerprinted files + if decoder.unique_hash(filename) in self.songhashes_set: + print("%s already fingerprinted, continuing..." % filename) + continue + + filenames_to_fingerprint.append(filename) + + # Prepare _fingerprint_worker input + worker_input = zip(filenames_to_fingerprint, + [self.limit] * len(filenames_to_fingerprint)) + + # Send off our tasks + iterator = pool.imap_unordered(_fingerprint_worker, + worker_input) + + # Loop till we have all of them + while True: + try: + song_name, hashes, file_hash = iterator.next() + #print('hashes', hashes) + except multiprocessing.TimeoutError: + continue + except StopIteration: + break + except: + print("Failed fingerprinting") + # Print traceback because we can't reraise it here + traceback.print_exc(file=sys.stdout) + else: + sid = self.db.insert_song(song_name, file_hash) + + self.db.insert_hashes(sid, hashes) + self.db.set_song_fingerprinted(sid) + self.get_fingerprinted_songs() + + pool.close() + pool.join() + + def fingerprint_file(self, filepath, song_name=None): + songname = decoder.path_to_songname(filepath) + song_hash = decoder.unique_hash(filepath) + song_name = song_name or songname + # don't refingerprint already fingerprinted files + if song_hash in self.songhashes_set: + print("%s already fingerprinted, continuing..." % song_name) + else: + song_name, hashes, file_hash = _fingerprint_worker( + filepath, + self.limit, + song_name=song_name + ) + sid = self.db.insert_song(song_name, file_hash) + + self.db.insert_hashes(sid, hashes) + self.db.set_song_fingerprinted(sid) + self.get_fingerprinted_songs() + + def find_matches(self, samples, Fs=fingerprint.DEFAULT_FS): + hashes = fingerprint.fingerprint(samples, Fs=Fs) + return self.db.return_matches(hashes) + + def align_matches(self, matches): + """ + Finds hash matches that align in time with other matches and finds + consensus about which hashes are "true" signal from the audio. + + Returns a dictionary with match information. + """ + # align by diffs + diff_counter = {} + largest = 0 + largest_count = 0 + song_id = -1 + for tup in matches: + sid, diff = tup + if diff not in diff_counter: + diff_counter[diff] = {} + if sid not in diff_counter[diff]: + diff_counter[diff][sid] = 0 + diff_counter[diff][sid] += 1 + + if diff_counter[diff][sid] > largest_count: + largest = diff + largest_count = diff_counter[diff][sid] + song_id = sid + + # extract idenfication + song = self.db.get_song_by_id(song_id) + if song: + # TODO: Clarify what `get_song_by_id` should return. + songname = song.get(Dejavu.SONG_NAME, None) + else: + return None + + # return match info + nseconds = round(float(largest) / fingerprint.DEFAULT_FS * + fingerprint.DEFAULT_WINDOW_SIZE * + fingerprint.DEFAULT_OVERLAP_RATIO, 5) + song = { + Dejavu.SONG_ID : song_id, + Dejavu.SONG_NAME : songname, + Dejavu.CONFIDENCE : largest_count, + Dejavu.OFFSET : int(largest), + Dejavu.OFFSET_SECS : nseconds, + Database.FIELD_FILE_SHA1 : song.get(Database.FIELD_FILE_SHA1, None),} + return song + + def recognize(self, recognizer, *options, **kwoptions): + r = recognizer(self) + return r.recognize(*options, **kwoptions) def _fingerprint_worker(filename, limit=None, song_name=None): - # Pool.imap sends arguments as tuples so we have to unpack - # them ourself. - try: - filename, limit = filename - except ValueError: - pass - - songname, extension = os.path.splitext(os.path.basename(filename)) - song_name = song_name or songname - channels, Fs, file_hash = decoder.read(filename, limit) - result = set() - channel_amount = len(channels) - - for channeln, channel in enumerate(channels): - # TODO: Remove prints or change them into optional logging. - print("Fingerprinting channel %d/%d for %s" % (channeln + 1, - channel_amount, - filename)) - hashes = fingerprint.fingerprint(channel, Fs=Fs) - print("Finished channel %d/%d for %s" % (channeln + 1, channel_amount, - filename)) - result |= set(hashes) - - return song_name, result, file_hash + # Pool.imap sends arguments as tuples so we have to unpack + # them ourself. + try: + filename, limit = filename + except ValueError: + pass + + songname, extension = os.path.splitext(os.path.basename(filename)) + song_name = song_name or songname + channels, Fs, file_hash = decoder.read(filename, limit) + result = set() + channel_amount = len(channels) + + for channeln, channel in enumerate(channels): + # TODO: Remove prints or change them into optional logging. + print("Fingerprinting channel %d/%d for %s" % (channeln + 1, + channel_amount, + filename)) + hashes = fingerprint.fingerprint(channel, Fs=Fs) + print("Finished channel %d/%d for %s" % (channeln + 1, channel_amount, + filename)) + result |= set(hashes) + + return song_name, result, file_hash def chunkify(lst, n): - """ - Splits a list into roughly n equal parts. - http://stackoverflow.com/questions/2130016/splitting-a-list-of-arbitrary-size-into-only-roughly-n-equal-parts - """ - return [lst[i::n] for i in xrange(n)] + """ + Splits a list into roughly n equal parts. + http://stackoverflow.com/questions/2130016/splitting-a-list-of-arbitrary-size-into-only-roughly-n-equal-parts + """ + return [lst[i::n] for i in xrange(n)] diff --git a/dejavu/database.py b/dejavu/database.py index e5732ff0..a7410d4b 100755 --- a/dejavu/database.py +++ b/dejavu/database.py @@ -1,8 +1,7 @@ from __future__ import absolute_import import abc - -class Database(object): +class Database(): __metaclass__ = abc.ABCMeta FIELD_FILE_SHA1 = 'file_sha1' diff --git a/dejavu/database_sql.py b/dejavu/database_sql.py index 031bdcb4..2bd5adc0 100755 --- a/dejavu/database_sql.py +++ b/dejavu/database_sql.py @@ -1,373 +1,378 @@ from __future__ import absolute_import -from itertools import izip_longest -import Queue +from itertools import zip_longest +import queue -import MySQLdb as mysql -from MySQLdb.cursors import DictCursor +from mysql.connector import (connection) +from mysql.connector import (cursor) from dejavu.database import Database class SQLDatabase(Database): - """ - Queries: - - 1) Find duplicates (shouldn't be any, though): - - select `hash`, `song_id`, `offset`, count(*) cnt - from fingerprints - group by `hash`, `song_id`, `offset` - having cnt > 1 - order by cnt asc; - - 2) Get number of hashes by song: - - select song_id, song_name, count(song_id) as num - from fingerprints - natural join songs - group by song_id - order by count(song_id) desc; - - 3) get hashes with highest number of collisions - - select - hash, - count(distinct song_id) as n - from fingerprints - group by `hash` - order by n DESC; - - => 26 different songs with same fingerprint (392 times): - - select songs.song_name, fingerprints.offset - from fingerprints natural join songs - where fingerprints.hash = "08d3c833b71c60a7b620322ac0c0aba7bf5a3e73"; - """ - - type = "mysql" - - # tables - FINGERPRINTS_TABLENAME = "fingerprints" - SONGS_TABLENAME = "songs" - - # fields - FIELD_FINGERPRINTED = "fingerprinted" - - # creates - CREATE_FINGERPRINTS_TABLE = """ - CREATE TABLE IF NOT EXISTS `%s` ( - `%s` binary(10) not null, - `%s` mediumint unsigned not null, - `%s` int unsigned not null, - INDEX (%s), - UNIQUE KEY `unique_constraint` (%s, %s, %s), - FOREIGN KEY (%s) REFERENCES %s(%s) ON DELETE CASCADE - ) ENGINE=INNODB;""" % ( - FINGERPRINTS_TABLENAME, Database.FIELD_HASH, - Database.FIELD_SONG_ID, Database.FIELD_OFFSET, Database.FIELD_HASH, - Database.FIELD_SONG_ID, Database.FIELD_OFFSET, Database.FIELD_HASH, - Database.FIELD_SONG_ID, SONGS_TABLENAME, Database.FIELD_SONG_ID - ) - - CREATE_SONGS_TABLE = """ - CREATE TABLE IF NOT EXISTS `%s` ( - `%s` mediumint unsigned not null auto_increment, - `%s` varchar(250) not null, - `%s` tinyint default 0, - `%s` binary(20) not null, - PRIMARY KEY (`%s`), - UNIQUE KEY `%s` (`%s`) - ) ENGINE=INNODB;""" % ( - SONGS_TABLENAME, Database.FIELD_SONG_ID, Database.FIELD_SONGNAME, FIELD_FINGERPRINTED, - Database.FIELD_FILE_SHA1, - Database.FIELD_SONG_ID, Database.FIELD_SONG_ID, Database.FIELD_SONG_ID, - ) - - # inserts (ignores duplicates) - INSERT_FINGERPRINT = """ - INSERT IGNORE INTO %s (%s, %s, %s) values - (UNHEX(%%s), %%s, %%s); - """ % (FINGERPRINTS_TABLENAME, Database.FIELD_HASH, Database.FIELD_SONG_ID, Database.FIELD_OFFSET) - - INSERT_SONG = "INSERT INTO %s (%s, %s) values (%%s, UNHEX(%%s));" % ( - SONGS_TABLENAME, Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1) - - # selects - SELECT = """ - SELECT %s, %s FROM %s WHERE %s = UNHEX(%%s); - """ % (Database.FIELD_SONG_ID, Database.FIELD_OFFSET, FINGERPRINTS_TABLENAME, Database.FIELD_HASH) - - SELECT_MULTIPLE = """ - SELECT HEX(%s), %s, %s FROM %s WHERE %s IN (%%s); - """ % (Database.FIELD_HASH, Database.FIELD_SONG_ID, Database.FIELD_OFFSET, - FINGERPRINTS_TABLENAME, Database.FIELD_HASH) - - SELECT_ALL = """ - SELECT %s, %s FROM %s; - """ % (Database.FIELD_SONG_ID, Database.FIELD_OFFSET, FINGERPRINTS_TABLENAME) - - SELECT_SONG = """ - SELECT %s, HEX(%s) as %s FROM %s WHERE %s = %%s; - """ % (Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1, Database.FIELD_FILE_SHA1, SONGS_TABLENAME, Database.FIELD_SONG_ID) - - SELECT_NUM_FINGERPRINTS = """ - SELECT COUNT(*) as n FROM %s - """ % (FINGERPRINTS_TABLENAME) - - SELECT_UNIQUE_SONG_IDS = """ - SELECT COUNT(DISTINCT %s) as n FROM %s WHERE %s = 1; - """ % (Database.FIELD_SONG_ID, SONGS_TABLENAME, FIELD_FINGERPRINTED) - - SELECT_SONGS = """ - SELECT %s, %s, HEX(%s) as %s FROM %s WHERE %s = 1; - """ % (Database.FIELD_SONG_ID, Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1, Database.FIELD_FILE_SHA1, - SONGS_TABLENAME, FIELD_FINGERPRINTED) - - # drops - DROP_FINGERPRINTS = "DROP TABLE IF EXISTS %s;" % FINGERPRINTS_TABLENAME - DROP_SONGS = "DROP TABLE IF EXISTS %s;" % SONGS_TABLENAME - - # update - UPDATE_SONG_FINGERPRINTED = """ - UPDATE %s SET %s = 1 WHERE %s = %%s - """ % (SONGS_TABLENAME, FIELD_FINGERPRINTED, Database.FIELD_SONG_ID) - - # delete - DELETE_UNFINGERPRINTED = """ - DELETE FROM %s WHERE %s = 0; - """ % (SONGS_TABLENAME, FIELD_FINGERPRINTED) - - def __init__(self, **options): - super(SQLDatabase, self).__init__() - self.cursor = cursor_factory(**options) - self._options = options - - def after_fork(self): - # Clear the cursor cache, we don't want any stale connections from - # the previous process. - Cursor.clear_cache() - - def setup(self): - """ - Creates any non-existing tables required for dejavu to function. - - This also removes all songs that have been added but have no - fingerprints associated with them. - """ - with self.cursor() as cur: - cur.execute(self.CREATE_SONGS_TABLE) - cur.execute(self.CREATE_FINGERPRINTS_TABLE) - cur.execute(self.DELETE_UNFINGERPRINTED) - - def empty(self): - """ - Drops tables created by dejavu and then creates them again - by calling `SQLDatabase.setup`. - - .. warning: - This will result in a loss of data - """ - with self.cursor() as cur: - cur.execute(self.DROP_FINGERPRINTS) - cur.execute(self.DROP_SONGS) - - self.setup() - - def delete_unfingerprinted_songs(self): - """ - Removes all songs that have no fingerprints associated with them. - """ - with self.cursor() as cur: - cur.execute(self.DELETE_UNFINGERPRINTED) - - def get_num_songs(self): - """ - Returns number of songs the database has fingerprinted. - """ - with self.cursor() as cur: - cur.execute(self.SELECT_UNIQUE_SONG_IDS) - - for count, in cur: - return count - return 0 - - def get_num_fingerprints(self): - """ - Returns number of fingerprints the database has fingerprinted. - """ - with self.cursor() as cur: - cur.execute(self.SELECT_NUM_FINGERPRINTS) - - for count, in cur: - return count - return 0 - - def set_song_fingerprinted(self, sid): - """ - Set the fingerprinted flag to TRUE (1) once a song has been completely - fingerprinted in the database. - """ - with self.cursor() as cur: - cur.execute(self.UPDATE_SONG_FINGERPRINTED, (sid,)) - - def get_songs(self): - """ - Return songs that have the fingerprinted flag set TRUE (1). - """ - with self.cursor(cursor_type=DictCursor) as cur: - cur.execute(self.SELECT_SONGS) - for row in cur: - yield row - - def get_song_by_id(self, sid): - """ - Returns song by its ID. - """ - with self.cursor(cursor_type=DictCursor) as cur: - cur.execute(self.SELECT_SONG, (sid,)) - return cur.fetchone() - - def insert(self, hash, sid, offset): - """ - Insert a (sha1, song_id, offset) row into database. - """ - with self.cursor() as cur: - cur.execute(self.INSERT_FINGERPRINT, (hash, sid, offset)) - - def insert_song(self, songname, file_hash): - """ - Inserts song in the database and returns the ID of the inserted record. - """ - with self.cursor() as cur: - cur.execute(self.INSERT_SONG, (songname, file_hash)) - return cur.lastrowid - - def query(self, hash): - """ - Return all tuples associated with hash. - - If hash is None, returns all entries in the - database (be careful with that one!). - """ - # select all if no key - query = self.SELECT_ALL if hash is None else self.SELECT - - with self.cursor() as cur: - cur.execute(query) - for sid, offset in cur: - yield (sid, offset) - - def get_iterable_kv_pairs(self): - """ - Returns all tuples in database. - """ - return self.query(None) - - def insert_hashes(self, sid, hashes): - """ - Insert series of hash => song_id, offset - values into the database. - """ - values = [] - for hash, offset in hashes: - values.append((hash, sid, offset)) - - with self.cursor() as cur: - for split_values in grouper(values, 1000): - cur.executemany(self.INSERT_FINGERPRINT, split_values) - - def return_matches(self, hashes): - """ - Return the (song_id, offset_diff) tuples associated with - a list of (sha1, sample_offset) values. - """ - # Create a dictionary of hash => offset pairs for later lookups - mapper = {} - for hash, offset in hashes: - mapper[hash.upper()] = offset - - # Get an iteratable of all the hashes we need - values = mapper.keys() - - with self.cursor() as cur: - for split_values in grouper(values, 1000): - # Create our IN part of the query - query = self.SELECT_MULTIPLE - query = query % ', '.join(['UNHEX(%s)'] * len(split_values)) - - cur.execute(query, split_values) - - for hash, sid, offset in cur: - # (sid, db_offset - song_sampled_offset) - yield (sid, offset - mapper[hash]) - - def __getstate__(self): - return (self._options,) - - def __setstate__(self, state): - self._options, = state - self.cursor = cursor_factory(**self._options) + """ + Queries: + + 1) Find duplicates (shouldn't be any, though): + + select `hash`, `song_id`, `offset`, count(*) cnt + from fingerprints + group by `hash`, `song_id`, `offset` + having cnt > 1 + order by cnt asc; + + 2) Get number of hashes by song: + + select song_id, song_name, count(song_id) as num + from fingerprints + natural join songs + group by song_id + order by count(song_id) desc; + + 3) get hashes with highest number of collisions + + select + hash, + count(distinct song_id) as n + from fingerprints + group by `hash` + order by n DESC; + + => 26 different songs with same fingerprint (392 times): + + select songs.song_name, fingerprints.offset + from fingerprints natural join songs + where fingerprints.hash = "08d3c833b71c60a7b620322ac0c0aba7bf5a3e73"; + """ + + type = "mysql" + + # tables + FINGERPRINTS_TABLENAME = "fingerprints" + SONGS_TABLENAME = "songs" + + # fields + FIELD_FINGERPRINTED = "fingerprinted" + + # creates + CREATE_FINGERPRINTS_TABLE = """ + CREATE TABLE IF NOT EXISTS `%s` ( + `%s` binary(10) not null, + `%s` mediumint unsigned not null, + `%s` int unsigned not null, + INDEX (%s), + UNIQUE KEY `unique_constraint` (%s, %s, %s), + FOREIGN KEY (%s) REFERENCES %s(%s) ON DELETE CASCADE + ) ENGINE=INNODB;""" % ( + FINGERPRINTS_TABLENAME, Database.FIELD_HASH, + Database.FIELD_SONG_ID, Database.FIELD_OFFSET, Database.FIELD_HASH, + Database.FIELD_SONG_ID, Database.FIELD_OFFSET, Database.FIELD_HASH, + Database.FIELD_SONG_ID, SONGS_TABLENAME, Database.FIELD_SONG_ID + ) + + CREATE_SONGS_TABLE = """ + CREATE TABLE IF NOT EXISTS `%s` ( + `%s` mediumint unsigned not null auto_increment, + `%s` varchar(250) not null, + `%s` tinyint default 0, + `%s` binary(20) not null, + PRIMARY KEY (`%s`), + UNIQUE KEY `%s` (`%s`) + ) ENGINE=INNODB;""" % ( + SONGS_TABLENAME, Database.FIELD_SONG_ID, Database.FIELD_SONGNAME, FIELD_FINGERPRINTED, + Database.FIELD_FILE_SHA1, + Database.FIELD_SONG_ID, Database.FIELD_SONG_ID, Database.FIELD_SONG_ID, + ) + + # inserts (ignores duplicates) + INSERT_FINGERPRINT = """ + INSERT IGNORE INTO %s (%s, %s, %s) values + (UNHEX(%%s), %%s, %%s); + """ % (FINGERPRINTS_TABLENAME, Database.FIELD_HASH, Database.FIELD_SONG_ID, Database.FIELD_OFFSET) + + INSERT_SONG = "INSERT INTO %s (%s, %s) values (%%s, UNHEX(%%s));" % ( + SONGS_TABLENAME, Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1) + + # selects + SELECT = """ + SELECT %s, %s FROM %s WHERE %s = UNHEX(%%s); + """ % (Database.FIELD_SONG_ID, Database.FIELD_OFFSET, FINGERPRINTS_TABLENAME, Database.FIELD_HASH) + + SELECT_MULTIPLE = """ + SELECT HEX(%s), %s, %s FROM %s WHERE %s IN (%%s); + """ % (Database.FIELD_HASH, Database.FIELD_SONG_ID, Database.FIELD_OFFSET, + FINGERPRINTS_TABLENAME, Database.FIELD_HASH) + + SELECT_ALL = """ + SELECT %s, %s FROM %s; + """ % (Database.FIELD_SONG_ID, Database.FIELD_OFFSET, FINGERPRINTS_TABLENAME) + + SELECT_SONG = """ + SELECT %s, HEX(%s) as %s FROM %s WHERE %s = %%s; + """ % (Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1, Database.FIELD_FILE_SHA1, SONGS_TABLENAME, Database.FIELD_SONG_ID) + + SELECT_NUM_FINGERPRINTS = """ + SELECT COUNT(*) as n FROM %s + """ % (FINGERPRINTS_TABLENAME) + + SELECT_UNIQUE_SONG_IDS = """ + SELECT COUNT(DISTINCT %s) as n FROM %s WHERE %s = 1; + """ % (Database.FIELD_SONG_ID, SONGS_TABLENAME, FIELD_FINGERPRINTED) + + SELECT_SONGS = """ + SELECT %s, %s, HEX(%s) as %s FROM %s WHERE %s = 1; + """ % (Database.FIELD_SONG_ID, Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1, Database.FIELD_FILE_SHA1, + SONGS_TABLENAME, FIELD_FINGERPRINTED) + + # drops + DROP_FINGERPRINTS = "DROP TABLE IF EXISTS %s;" % FINGERPRINTS_TABLENAME + DROP_SONGS = "DROP TABLE IF EXISTS %s;" % SONGS_TABLENAME + + # update + UPDATE_SONG_FINGERPRINTED = """ + UPDATE %s SET %s = 1 WHERE %s = %%s + """ % (SONGS_TABLENAME, FIELD_FINGERPRINTED, Database.FIELD_SONG_ID) + + # delete + DELETE_UNFINGERPRINTED = """ + DELETE FROM %s WHERE %s = 0; + """ % (SONGS_TABLENAME, FIELD_FINGERPRINTED) + + def __init__(self, **options): + super(SQLDatabase, self).__init__() + self.cursor = cursor_factory(**options) + self._options = options + + def after_fork(self): + # Clear the cursor cache, we don't want any stale connections from + # the previous process. + Cursor.clear_cache() + + def setup(self): + """ + Creates any non-existing tables required for dejavu to function. + + This also removes all songs that have been added but have no + fingerprints associated with them. + """ + with self.cursor() as cur: + cur.execute(self.CREATE_SONGS_TABLE) + cur.execute(self.CREATE_FINGERPRINTS_TABLE) + cur.execute(self.DELETE_UNFINGERPRINTED) + + def empty(self): + """ + Drops tables created by dejavu and then creates them again + by calling `SQLDatabase.setup`. + + .. warning: + This will result in a loss of data + """ + with self.cursor() as cur: + cur.execute(self.DROP_FINGERPRINTS) + cur.execute(self.DROP_SONGS) + + self.setup() + + def delete_unfingerprinted_songs(self): + """ + Removes all songs that have no fingerprints associated with them. + """ + with self.cursor() as cur: + cur.execute(self.DELETE_UNFINGERPRINTED) + + def get_num_songs(self): + """ + Returns number of songs the database has fingerprinted. + """ + with self.cursor() as cur: + cur.execute(self.SELECT_UNIQUE_SONG_IDS) + + for count, in cur: + return count + return 0 + + def get_num_fingerprints(self): + """ + Returns number of fingerprints the database has fingerprinted. + """ + with self.cursor() as cur: + cur.execute(self.SELECT_NUM_FINGERPRINTS) + + for count, in cur: + return count + return 0 + + def set_song_fingerprinted(self, sid): + """ + Set the fingerprinted flag to TRUE (1) once a song has been completely + fingerprinted in the database. + """ + with self.cursor() as cur: + cur.execute(self.UPDATE_SONG_FINGERPRINTED, (sid,)) + + def get_songs(self): + """ + Return songs that have the fingerprinted flag set TRUE (1). + """ + with self.cursor(cursor_class=cursor.MySQLCursorDict) as cur: + cur.execute(self.SELECT_SONGS) + for row in cur: + yield row + + def get_song_by_id(self, sid): + """ + Returns song by its ID. + """ + with self.cursor(cursor_class=cursor.MySQLCursorDict) as cur: + cur.execute(self.SELECT_SONG, (sid,)) + return cur.fetchone() + + def insert(self, hash, sid, offset): + """ + Insert a (sha1, song_id, offset) row into database. + """ + with self.cursor() as cur: + cur.execute(self.INSERT_FINGERPRINT, (hash, sid, offset)) + + def insert_song(self, songname, file_hash): + """ + Inserts song in the database and returns the ID of the inserted record. + """ + with self.cursor() as cur: + cur.execute(self.INSERT_SONG, (songname, file_hash)) + return cur.lastrowid + + def query(self, hash): + """ + Return all tuples associated with hash. + + If hash is None, returns all entries in the + database (be careful with that one!). + """ + # select all if no key + query = self.SELECT_ALL if hash is None else self.SELECT + + with self.cursor() as cur: + cur.execute(query) + for sid, offset in cur: + yield (sid, offset) + + def get_iterable_kv_pairs(self): + """ + Returns all tuples in database. + """ + return self.query(None) + + def insert_hashes(self, sid, hashes): + """ + Insert series of hash => song_id, offset + values into the database. + """ + values = [] + for hashit, offset in hashes: + values.append((hashit, int(sid), int(offset))) + + with self.cursor() as cur: + for split_values in grouper(values, 1000): + lst = list(split_values) + #print('split_vals', lst) + cur.executemany(self.INSERT_FINGERPRINT, lst) + + def return_matches(self, hashes): + """ + Return the (song_id, offset_diff) tuples associated with + a list of (sha1, sample_offset) values. + """ + # Create a dictionary of hash => offset pairs for later lookups + mapper = {} + for hashit, offset in hashes: + mapper[hashit.upper()] = offset + + # Get an iteratable of all the hashes we need + values = mapper.keys() + + with self.cursor() as cur: + for split_values in grouper(values, 1000): + # Create our IN part of the query + query = self.SELECT_MULTIPLE + lstvals = list(split_values) + query = query % ', '.join(['UNHEX(%s)'] * len(lstvals)) + + cur.execute(query, lstvals) + + for hashit, sid, offset in cur: + # (sid, db_offset - song_sampled_offset) + yield (sid, offset - mapper[hashit]) + + def __getstate__(self): + return (self._options,) + + def __setstate__(self, state): + self._options, = state + self.cursor = cursor_factory(**self._options) def grouper(iterable, n, fillvalue=None): - args = [iter(iterable)] * n - return (filter(None, values) for values - in izip_longest(fillvalue=fillvalue, *args)) + args = [iter(iterable)] * n + return (filter(None, values) for values + in zip_longest(*args, fillvalue=fillvalue)) def cursor_factory(**factory_options): - def cursor(**options): - options.update(factory_options) - return Cursor(**options) - return cursor - - -class Cursor(object): - """ - Establishes a connection to the database and returns an open cursor. - - - ```python - # Use as context manager - with Cursor() as cur: - cur.execute(query) - ``` - """ - _cache = Queue.Queue(maxsize=5) - - def __init__(self, cursor_type=mysql.cursors.Cursor, **options): - super(Cursor, self).__init__() - - try: - conn = self._cache.get_nowait() - except Queue.Empty: - conn = mysql.connect(**options) - else: - # Ping the connection before using it from the cache. - conn.ping(True) - - self.conn = conn - self.conn.autocommit(False) - self.cursor_type = cursor_type - - @classmethod - def clear_cache(cls): - cls._cache = Queue.Queue(maxsize=5) - - def __enter__(self): - self.cursor = self.conn.cursor(self.cursor_type) - return self.cursor - - def __exit__(self, extype, exvalue, traceback): - # if we had a MySQL related error we try to rollback the cursor. - if extype is mysql.MySQLError: - self.cursor.rollback() - - self.cursor.close() - self.conn.commit() - - # Put it back on the queue - try: - self._cache.put_nowait(self.conn) - except Queue.Full: - self.conn.close() + def cursor(**options): + options.update(factory_options) + return Cursor(**options) + return cursor + + +class Cursor(): + """ + Establishes a connection to the database and returns an open cursor. + + + ```python + # Use as context manager + with Cursor() as cur: + cur.execute(query) + ``` + """ + _cache = queue.Queue(maxsize=5) + + def __init__(self, cursor_class=cursor.MySQLCursor, **options): + super(Cursor, self).__init__() + + try: + conn = self._cache.get_nowait() + except queue.Empty: + conn = connection.MySQLConnection(**options) + else: + # Ping the connection before using it from the cache. + conn.ping(True) + + self.conn = conn + #self.conn.autocommit(False) + if options is not None and 'cursor_class' in options: + cursor_class = options['cursor_class'] + self.cursor_class = cursor_class + + @classmethod + def clear_cache(cls): + cls._cache = queue.Queue(maxsize=5) + + def __enter__(self): + self.cursor = self.conn.cursor(cursor_class=self.cursor_class) + return self.cursor + + def __exit__(self, extype, exvalue, traceback): + # if we had a MySQL related error we try to rollback the cursor. + if extype is not None: + self.conn.rollback() + + self.cursor.close() + self.conn.commit() + + # Put it back on the queue + try: + self._cache.put_nowait(self.conn) + except Queue.Full: + self.conn.close() diff --git a/dejavu/decoder.py b/dejavu/decoder.py index 04aa39f4..b5144f57 100755 --- a/dejavu/decoder.py +++ b/dejavu/decoder.py @@ -56,7 +56,7 @@ def read(filename, limit=None): data = np.fromstring(audiofile._data, np.int16) channels = [] - for chn in xrange(audiofile.channels): + for chn in range(audiofile.channels): channels.append(data[chn::audiofile.channels]) fs = audiofile.frame_rate diff --git a/dejavu/fingerprint.py b/dejavu/fingerprint.py index 4db321b5..fc0f22ba 100755 --- a/dejavu/fingerprint.py +++ b/dejavu/fingerprint.py @@ -3,7 +3,7 @@ import matplotlib.pyplot as plt from scipy.ndimage.filters import maximum_filter from scipy.ndimage.morphology import (generate_binary_structure, - iterate_structure, binary_erosion) + iterate_structure, binary_erosion) import hashlib from operator import itemgetter @@ -62,94 +62,96 @@ FINGERPRINT_REDUCTION = 20 def fingerprint(channel_samples, Fs=DEFAULT_FS, - wsize=DEFAULT_WINDOW_SIZE, - wratio=DEFAULT_OVERLAP_RATIO, - fan_value=DEFAULT_FAN_VALUE, - amp_min=DEFAULT_AMP_MIN): - """ - FFT the channel, log transform output, find local maxima, then return - locally sensitive hashes. - """ - # FFT the signal and extract frequency components - arr2D = mlab.specgram( - channel_samples, - NFFT=wsize, - Fs=Fs, - window=mlab.window_hanning, - noverlap=int(wsize * wratio))[0] - - # apply log transform since specgram() returns linear array - arr2D = 10 * np.log10(arr2D) - arr2D[arr2D == -np.inf] = 0 # replace infs with zeros - - # find local maxima - local_maxima = get_2D_peaks(arr2D, plot=False, amp_min=amp_min) - - # return hashes - return generate_hashes(local_maxima, fan_value=fan_value) + wsize=DEFAULT_WINDOW_SIZE, + wratio=DEFAULT_OVERLAP_RATIO, + fan_value=DEFAULT_FAN_VALUE, + amp_min=DEFAULT_AMP_MIN): + """ + FFT the channel, log transform output, find local maxima, then return + locally sensitive hashes. + """ + # FFT the signal and extract frequency components + arr2D = mlab.specgram( + channel_samples, + NFFT=wsize, + Fs=Fs, + window=mlab.window_hanning, + noverlap=int(wsize * wratio))[0] + + # apply log transform since specgram() returns linear array + arr2D = 10 * np.log10(arr2D) + arr2D[arr2D == -np.inf] = 0 # replace infs with zeros + + # find local maxima + local_maxima = get_2D_peaks(arr2D, plot=False, amp_min=amp_min) + + # return hashes + return generate_hashes(local_maxima, fan_value=fan_value) def get_2D_peaks(arr2D, plot=False, amp_min=DEFAULT_AMP_MIN): - # http://docs.scipy.org/doc/scipy/reference/generated/scipy.ndimage.morphology.iterate_structure.html#scipy.ndimage.morphology.iterate_structure - struct = generate_binary_structure(2, 1) - neighborhood = iterate_structure(struct, PEAK_NEIGHBORHOOD_SIZE) - - # find local maxima using our fliter shape - local_max = maximum_filter(arr2D, footprint=neighborhood) == arr2D - background = (arr2D == 0) - eroded_background = binary_erosion(background, structure=neighborhood, - border_value=1) - - # Boolean mask of arr2D with True at peaks - detected_peaks = local_max - eroded_background - - # extract peaks - amps = arr2D[detected_peaks] - j, i = np.where(detected_peaks) - - # filter peaks - amps = amps.flatten() - peaks = zip(i, j, amps) - peaks_filtered = [x for x in peaks if x[2] > amp_min] # freq, time, amp - - # get indices for frequency and time - frequency_idx = [x[1] for x in peaks_filtered] - time_idx = [x[0] for x in peaks_filtered] - - if plot: - # scatter of the peaks - fig, ax = plt.subplots() - ax.imshow(arr2D) - ax.scatter(time_idx, frequency_idx) - ax.set_xlabel('Time') - ax.set_ylabel('Frequency') - ax.set_title("Spectrogram") - plt.gca().invert_yaxis() - plt.show() - - return zip(frequency_idx, time_idx) + # http://docs.scipy.org/doc/scipy/reference/generated/scipy.ndimage.morphology.iterate_structure.html#scipy.ndimage.morphology.iterate_structure + struct = generate_binary_structure(2, 1) + neighborhood = iterate_structure(struct, PEAK_NEIGHBORHOOD_SIZE) + + # find local maxima using our fliter shape + local_max = maximum_filter(arr2D, footprint=neighborhood) == arr2D + background = (arr2D == 0) + eroded_background = binary_erosion(background, structure=neighborhood, + border_value=1) + + # Boolean mask of arr2D with True at peaks + detected_peaks = local_max - eroded_background + + # extract peaks + amps = arr2D[detected_peaks] + j, i = np.where(detected_peaks) + + # filter peaks + amps = amps.flatten() + peaks = zip(i, j, amps) + peaks_filtered = [x for x in peaks if x[2] > amp_min] # freq, time, amp + + # get indices for frequency and time + frequency_idx = [x[1] for x in peaks_filtered] + time_idx = [x[0] for x in peaks_filtered] + + if plot: + # scatter of the peaks + fig, ax = plt.subplots() + ax.imshow(arr2D) + ax.scatter(time_idx, frequency_idx) + ax.set_xlabel('Time') + ax.set_ylabel('Frequency') + ax.set_title("Spectrogram") + plt.gca().invert_yaxis() + plt.show() + + return zip(frequency_idx, time_idx) def generate_hashes(peaks, fan_value=DEFAULT_FAN_VALUE): - """ - Hash list structure: - sha1_hash[0:20] time_offset - [(e05b341a9b77a51fd26, 32), ... ] - """ - if PEAK_SORT: - peaks.sort(key=itemgetter(1)) - - for i in range(len(peaks)): - for j in range(1, fan_value): - if (i + j) < len(peaks): - - freq1 = peaks[i][IDX_FREQ_I] - freq2 = peaks[i + j][IDX_FREQ_I] - t1 = peaks[i][IDX_TIME_J] - t2 = peaks[i + j][IDX_TIME_J] - t_delta = t2 - t1 - - if t_delta >= MIN_HASH_TIME_DELTA and t_delta <= MAX_HASH_TIME_DELTA: - h = hashlib.sha1( - "%s|%s|%s" % (str(freq1), str(freq2), str(t_delta))) - yield (h.hexdigest()[0:FINGERPRINT_REDUCTION], t1) + """ + Hash list structure: + sha1_hash[0:20] time_offset + [(e05b341a9b77a51fd26, 32), ... ] + """ + if PEAK_SORT: + peaks = sorted(peaks, key=itemgetter(1)) + + lenPeaks = len(peaks) + #print("lenPeaks", lenPeaks) + for i in range(lenPeaks): + for j in range(1, fan_value): + if (i + j) < lenPeaks: + + freq1 = peaks[i][IDX_FREQ_I] + freq2 = peaks[i + j][IDX_FREQ_I] + t1 = peaks[i][IDX_TIME_J] + t2 = peaks[i + j][IDX_TIME_J] + t_delta = t2 - t1 + + if t_delta >= MIN_HASH_TIME_DELTA and t_delta <= MAX_HASH_TIME_DELTA: + h = hashlib.sha1( + "{}|{}|{}".format(str(freq1), str(freq2), str(t_delta)).encode('utf-8')) + yield (h.hexdigest()[0:FINGERPRINT_REDUCTION], t1) diff --git a/dejavu/recognize.py b/dejavu/recognize.py index b43a8791..d546aa01 100755 --- a/dejavu/recognize.py +++ b/dejavu/recognize.py @@ -4,109 +4,109 @@ import pyaudio import time +class BaseRecognizer(): -class BaseRecognizer(object): + def __init__(self, dejavu): + self.dejavu = dejavu + self.Fs = fingerprint.DEFAULT_FS - def __init__(self, dejavu): - self.dejavu = dejavu - self.Fs = fingerprint.DEFAULT_FS + def _recognize(self, *data): + matches = [] + for d in data: + matches.extend(self.dejavu.find_matches(d, Fs=self.Fs)) - def _recognize(self, *data): - matches = [] - for d in data: - matches.extend(self.dejavu.find_matches(d, Fs=self.Fs)) - return self.dejavu.align_matches(matches) + return self.dejavu.align_matches(matches) - def recognize(self): - pass # base class does nothing + def recognize(self): + pass # base class does nothing class FileRecognizer(BaseRecognizer): - def __init__(self, dejavu): - super(FileRecognizer, self).__init__(dejavu) + def __init__(self, dejavu): + super(FileRecognizer, self).__init__(dejavu) - def recognize_file(self, filename): - frames, self.Fs, file_hash = decoder.read(filename, self.dejavu.limit) + def recognize_file(self, filename): + frames, self.Fs, file_hash = decoder.read(filename, self.dejavu.limit) - t = time.time() - match = self._recognize(*frames) - t = time.time() - t + t = time.time() + match = self._recognize(*frames) + t = time.time() - t - if match: - match['match_time'] = t + if match: + match['match_time'] = t - return match + return match - def recognize(self, filename): - return self.recognize_file(filename) + def recognize(self, filename): + return self.recognize_file(filename) class MicrophoneRecognizer(BaseRecognizer): - default_chunksize = 8192 - default_format = pyaudio.paInt16 - default_channels = 2 - default_samplerate = 44100 - - def __init__(self, dejavu): - super(MicrophoneRecognizer, self).__init__(dejavu) - self.audio = pyaudio.PyAudio() - self.stream = None - self.data = [] - self.channels = MicrophoneRecognizer.default_channels - self.chunksize = MicrophoneRecognizer.default_chunksize - self.samplerate = MicrophoneRecognizer.default_samplerate - self.recorded = False - - def start_recording(self, channels=default_channels, - samplerate=default_samplerate, - chunksize=default_chunksize): - self.chunksize = chunksize - self.channels = channels - self.recorded = False - self.samplerate = samplerate - - if self.stream: - self.stream.stop_stream() - self.stream.close() - - self.stream = self.audio.open( - format=self.default_format, - channels=channels, - rate=samplerate, - input=True, - frames_per_buffer=chunksize, - ) - - self.data = [[] for i in range(channels)] - - def process_recording(self): - data = self.stream.read(self.chunksize) - nums = np.fromstring(data, np.int16) - for c in range(self.channels): - self.data[c].extend(nums[c::self.channels]) - - def stop_recording(self): - self.stream.stop_stream() - self.stream.close() - self.stream = None - self.recorded = True - - def recognize_recording(self): - if not self.recorded: - raise NoRecordingError("Recording was not complete/begun") - return self._recognize(*self.data) - - def get_recorded_time(self): - return len(self.data[0]) / self.rate - - def recognize(self, seconds=10): - self.start_recording() - for i in range(0, int(self.samplerate / self.chunksize - * seconds)): - self.process_recording() - self.stop_recording() - return self.recognize_recording() + default_chunksize = 8192 + default_format = pyaudio.paInt16 + default_channels = 2 + default_samplerate = 44100 + + def __init__(self, dejavu): + super(MicrophoneRecognizer, self).__init__(dejavu) + self.audio = pyaudio.PyAudio() + self.stream = None + self.data = [] + self.channels = MicrophoneRecognizer.default_channels + self.chunksize = MicrophoneRecognizer.default_chunksize + self.samplerate = MicrophoneRecognizer.default_samplerate + self.recorded = False + + def start_recording(self, channels=default_channels, + samplerate=default_samplerate, + chunksize=default_chunksize): + self.chunksize = chunksize + self.channels = channels + self.recorded = False + self.samplerate = samplerate + + if self.stream: + self.stream.stop_stream() + self.stream.close() + + self.stream = self.audio.open( + format=self.default_format, + channels=channels, + rate=samplerate, + input=True, + frames_per_buffer=chunksize, + ) + + self.data = [[] for i in range(channels)] + + def process_recording(self): + data = self.stream.read(self.chunksize) + nums = np.fromstring(data, np.int16) + for c in range(self.channels): + self.data[c].extend(nums[c::self.channels]) + + def stop_recording(self): + self.stream.stop_stream() + self.stream.close() + self.stream = None + self.recorded = True + + def recognize_recording(self): + if not self.recorded: + raise NoRecordingError("Recording was not complete/begun") + return self._recognize(*self.data) + + def get_recorded_time(self): + return len(self.data[0]) / self.rate + + def recognize(self, seconds=10): + self.start_recording() + for i in range(0, int(self.samplerate / self.chunksize + * seconds)): + self.process_recording() + self.stop_recording() + return self.recognize_recording() class NoRecordingError(Exception): - pass + pass From 162c1f27715d019ea7bcf80e9705a09ba1678fda Mon Sep 17 00:00:00 2001 From: David Dietrich Date: Sat, 3 Jun 2017 12:11:45 -0700 Subject: [PATCH 2/4] Upgraded to Python 3. --- example.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/example.py b/example.py index 1c99e69c..b9023dd2 100755 --- a/example.py +++ b/example.py @@ -6,7 +6,7 @@ from dejavu.recognize import FileRecognizer, MicrophoneRecognizer # load config from a JSON file (or anything outputting a python dictionary) -with open("dejavu.cnf.SAMPLE") as f: +with open("dejavu.cnf") as f: config = json.load(f) if __name__ == '__main__': @@ -19,17 +19,17 @@ # Recognize audio from a file song = djv.recognize(FileRecognizer, "mp3/Sean-Fournier--Falling-For-You.mp3") - print "From file we recognized: %s\n" % song + print("From file we recognized: {}\n".format(song)) # Or recognize audio from your microphone for `secs` seconds - secs = 5 - song = djv.recognize(MicrophoneRecognizer, seconds=secs) - if song is None: - print "Nothing recognized -- did you play the song out loud so your mic could hear it? :)" - else: - print "From mic with %d seconds we recognized: %s\n" % (secs, song) +# secs = 5 +# song = djv.recognize(MicrophoneRecognizer, seconds=secs) +# if song is None: +# print("Nothing recognized -- did you play the song out loud so your mic could hear it? :)") +# else: +# print("From mic with %d seconds we recognized: %s\n" % (secs, song)) # Or use a recognizer without the shortcut, in anyway you would like - recognizer = FileRecognizer(djv) - song = recognizer.recognize_file("mp3/Josh-Woodward--I-Want-To-Destroy-Something-Beautiful.mp3") - print "No shortcut, we recognized: %s\n" % song \ No newline at end of file +# recognizer = FileRecognizer(djv) +# song = recognizer.recognize_file("mp3/Josh-Woodward--I-Want-To-Destroy-Something-Beautiful.mp3") +# print("No shortcut, we recognized: {}\n".format(song)) \ No newline at end of file From 9047441de9b32979944e3ea058765e4db9d31e97 Mon Sep 17 00:00:00 2001 From: David Dietrich Date: Sun, 4 Jun 2017 04:11:35 -0700 Subject: [PATCH 3/4] grouper now returning a list instead of a filter() iterator. --- dejavu/database_sql.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/dejavu/database_sql.py b/dejavu/database_sql.py index 2bd5adc0..85321d40 100755 --- a/dejavu/database_sql.py +++ b/dejavu/database_sql.py @@ -274,9 +274,8 @@ def insert_hashes(self, sid, hashes): with self.cursor() as cur: for split_values in grouper(values, 1000): - lst = list(split_values) - #print('split_vals', lst) - cur.executemany(self.INSERT_FINGERPRINT, lst) + #print('split_vals', split_values) + cur.executemany(self.INSERT_FINGERPRINT, split_values) def return_matches(self, hashes): """ @@ -295,10 +294,9 @@ def return_matches(self, hashes): for split_values in grouper(values, 1000): # Create our IN part of the query query = self.SELECT_MULTIPLE - lstvals = list(split_values) - query = query % ', '.join(['UNHEX(%s)'] * len(lstvals)) + query = query % ', '.join(['UNHEX(%s)'] * len(split_values)) - cur.execute(query, lstvals) + cur.execute(query, split_values) for hashit, sid, offset in cur: # (sid, db_offset - song_sampled_offset) @@ -314,8 +312,7 @@ def __setstate__(self, state): def grouper(iterable, n, fillvalue=None): args = [iter(iterable)] * n - return (filter(None, values) for values - in zip_longest(*args, fillvalue=fillvalue)) + return (list(filter(None, values)) for values in zip_longest(*args, fillvalue=fillvalue)) def cursor_factory(**factory_options): From b4f10e15fdf2b11c53800d7e55bb8d9f044c57dd Mon Sep 17 00:00:00 2001 From: David Dietrich Date: Sun, 4 Jun 2017 05:01:49 -0700 Subject: [PATCH 4/4] Updated to v0.2.0 and added maintainer email for this fork. --- requirements.txt | 1 + setup.py | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index 9478f734..c9df2f70 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,5 @@ PyAudio>=0.2.7 numpy>=1.8.2 scipy>=0.12.1 matplotlib>=1.3.1 +mysql-connector-python>=1.1 ### END ### diff --git a/setup.py b/setup.py index 8312d1d5..72fc5e8e 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ def parse_requirements(requirements): return reqs PACKAGE_NAME = "PyDejavu" -PACKAGE_VERSION = "0.1.3" +PACKAGE_VERSION = "0.2.0" SUMMARY = 'Dejavu: Audio Fingerprinting in Python' DESCRIPTION = """ Audio fingerprinting and recognition algorithm implemented in Python @@ -40,9 +40,9 @@ def parse_requirements(requirements): long_description=DESCRIPTION, author='Will Drevo', author_email='will.drevo@gmail.com', - maintainer="Will Drevo", - maintainer_email="will.drevo@gmail.com", - url='http://github.com/tuxdna/dejavu', + maintainer="David Dietrich", + maintainer_email='"David Dietrich" ', + url='http://github.com/grayarrow/dejavu', license='MIT License', include_package_data=True, packages=find_packages(),