kevincon · kevincon · Nov 25, 2018 · Nov 25, 2018 · Nov 25, 2018 · Nov 25, 2018
diff --git a/.gitignore b/.gitignore
@@ -37,3 +37,6 @@ venv
 last_match
 date_max
 log.txt
+
+# PyCharm
+.idea/
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,15 @@
+language: python
+dist: xenial
+python:
+  - "3.6"
+  - "3.7"
+
+branches:
+  only:
+    - master
+
+before_script:
+  - pip install tox tox-travis pipenv
+
+script:
+  - tox
diff --git a/Pipfile b/Pipfile
@@ -0,0 +1,16 @@
+[[source]]
+url = "https://pypi.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+dota2py = "==0.1.3"
+numpy = "==1.15.4"
+progressbar = "==2.5"
+pymongo = "==2.6.3"
+scikit-learn = "==0.20.0"
+Flask = "==0.10.1"
+matplotlib = "==3.0.1"
+
+[dev-packages]
+pytest = "==4.0.1"
diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/README.md b/README.md
@@ -21,15 +21,9 @@ Everything has been tested to work on Mac OSX 10.8. To download our project and
 
 ### Dependencies
 
-#### VirtualEnv
+#### pipenv
 
-We use [VirtualEnv](http://www.virtualenv.org/en/latest/) to help facilitate getting setup on a new machine. There are [a number of ways of installing it](http://www.virtualenv.org/en/latest/virtualenv.html#installation), depending on your operating system.
-
-#### GFortran
-
-[GFortran](http://gcc.gnu.org/wiki/GFortranBinaries) is required to install scipy. If you're running Mac OSX, we recommend using [Homebrew](http://brew.sh/) to install GFortran via its gcc formula:
-
-    brew install gcc
+We use [pipenv](https://pipenv.readthedocs.io/en/latest/) to help facilitate getting setup on a new machine. There are [a number of ways of installing it](https://pipenv.readthedocs.io/en/latest/#install-pipenv-today), depending on your operating system.
 
 #### MongoDB, Database Backup, and Environment Variables (optional for just running recommendation engine)
 
@@ -50,26 +44,18 @@ You may find it helpful to add these commands to your bash profile in your home
 
     git clone git@github.com:kevincon/dotaml.git
 
-### Initialize VirtualEnv
-
-From inside the repository root folder, initialize VirtualEnv by running:
+### Initialize pipenv
 
-    virtualenv venv
+From inside the repository root folder, initialize pipenv by running:
 
-This creates a new folder in the directory called "venv." You only need to do this once. Don't worry about ever accidentally adding this folder to the repository. There's an entry for it in the .gitignore file.
+    pipenv install
 
-Next, activate the VirtualEnv by running:
+Next, activate the pipenv by running:
 
-    source venv/bin/activate
+    pipenv shell
 
-You should now see "(venv)" as part of your terminal prompt, indicating you are now inside your VirtualEnv. Note that closing the terminal window deactivates VirtualEnv, so you must run ```source venv/bin/activate``` each time you open a new terminal window for development.
+You should now see "(dotaml)" as part of your terminal prompt, indicating you are now inside your virtual environment. Note that closing the terminal window deactivates the virtual environment, so you must run `pipenv show` each time you open a new terminal window for development.
 
-### Installing required packages
-
-Now that you're in VirtualEnv, run the following command to automatically install all of the Python modules that are required:
-
-    pip install -r requirements.txt
-
 ### Running the web app
 
 From the root folder of the project, run:
@@ -94,7 +80,7 @@ Feel free to submit a pull request if you are interested in continuing developme
 ```
 The MIT License (MIT)
 
-Copyright (c) 2015 Kevin Conley
+Copyright (c) 2015-2018 Kevin Conley
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in

diff --git a/app.py b/app.py
@@ -11,7 +11,7 @@
 #engine = Engine(D2LogisticRegression())
 
 def get_api_string(recommendations, prob):
-    recommendations = map(str, recommendations)
+    recommendations = list(map(str, recommendations))
     return json.dumps({'x': recommendations, 'prob_x': prob})
 
 @app.route("/")
@@ -26,13 +26,13 @@ def api():
     if len(my_team) == 1 and my_team[0] == '':
         my_team = []
     else:
-        my_team = map(int, my_team)
+        my_team = list(map(int, my_team))
 
     their_team = request.args['y'].split(',')
     if len(their_team) == 1 and their_team[0] == '':
         their_team = []
     else:
-        their_team = map(int, their_team)
+        their_team = list(map(int, their_team))
 
     prob_recommendation_pairs = engine.recommend(my_team, their_team)
     recommendations = [hero for prob, hero in prob_recommendation_pairs]

diff --git a/data_collection/dotabot.py b/data_collection/dotabot.py
@@ -133,7 +133,7 @@ def main(start_match_id):
             saved_id = int(f.readline())
             ans = False
             try:
-                ans = raw_input('Start at last_match %d? ' % saved_id)
+                ans = input(f'Start at last_match {saved_id}?')
                 if ans in ['yes', 'y', 'Y', 'YES', 'Yes']:
                     ans = True
             except KeyboardInterrupt:
@@ -144,12 +144,12 @@ def main(start_match_id):
                         date_max = int(d.readline())
                     match_id = saved_id
                 except IOError:
-                    print 'Could not open date_max file, ignoring last_match value.'
+                    print('Could not open date_max file, ignoring last_match value.')
 
     except IOError:
        pass 
 
-    print 'OK, starting at match_id=%s' % match_id
+    print(f'OK, starting at match_id={match_id}')
 
     setup()
     main(match_id)
diff --git a/data_collection/dotabot2.py b/data_collection/dotabot2.py
@@ -9,7 +9,7 @@
 db = client[os.getenv('DOTABOT_DB_NAME', 'dotabot')]
 match_collection = db.matches
 
-logging.basicConfig(filename='/home/kcon/dota2project/log.txt')
+logging.basicConfig(filename='log.txt')
 logger = logging.getLogger('dotabot')
 
 def setup():

diff --git a/data_collection/util.py b/data_collection/util.py
@@ -1,6 +1,6 @@
 import smtplib, os
 from email.mime.text import MIMEText
-from email.Utils import formatdate
+from email.utils import formatdate
 from datetime import datetime
 from dota2py import data
 
@@ -9,7 +9,7 @@ def print_match_history(gmh_result):
     for match in gmh_result['matches']:
         match_id = match['match_id']
         start_time = datetime.fromtimestamp(int(match['start_time']))
-        print 'Match %d - %s' % (match_id, start_time)
+        print(f'Match {match_id} - {start_time}')
 
 def get_game_mode_string(game_mode_id):
     '''Return a human-readable string for a game_mode id.'''
@@ -20,8 +20,12 @@ def get_game_mode_string(game_mode_id):
 
 def send_email(body,
                subject='Quick Message From DOTA2 Python Script',
-               recipients=['kcon@stanford.edu', 'djperry@stanford.edu']):
+               recipients=None):
     '''Send an email.'''
+
+    if not recipients:
+        recipients = ['kcon@stanford.edu', 'djperry@stanford.edu']
+
     # Credentials
     username = os.getenv('DOTABOT_USERNAME')
     hostname = os.getenv('DOTABOT_HOSTNAME')

diff --git a/engine.py b/engine.py
@@ -17,13 +17,13 @@ def main():
     my_team = [76, 54]
     their_team = [5, 15, 46, 91, 13]
 
-    print 'My Team: %s' % [get_hero_human_readable(hero_id) for hero_id in my_team]
-    print 'Their Team: %s' % [get_hero_human_readable(hero_id) for hero_id in their_team]
-    print 'Recommend:'
+    print(f'My Team: {[get_hero_human_readable(hero_id) for hero_id in my_team]}')
+    print(f'Their Team: {[get_hero_human_readable(hero_id) for hero_id in their_team]}')
+    print('Recommend:')
     #engine = Engine(D2KNearestNeighbors())
     engine = Engine(D2LogisticRegression())
     recommendations = engine.recommend(my_team, their_team)
-    print [(prob, get_hero_human_readable(hero)) for prob, hero in recommendations]
+    print([(prob, get_hero_human_readable(hero)) for prob, hero in recommendations])
 
 class Engine:
     def __init__(self, algorithm):

diff --git a/k_nearest_neighbors/evaluate_model_10000.pkl b/k_nearest_neighbors/evaluate_model_10000.pkl
diff --git a/k_nearest_neighbors/evaluate_model_51022.pkl b/k_nearest_neighbors/evaluate_model_51022.pkl
diff --git a/k_nearest_neighbors/evaluate_model_51022.tar.gz b/k_nearest_neighbors/evaluate_model_51022.tar.gz
diff --git a/k_nearest_neighbors/k_nearest_neighbors.py b/k_nearest_neighbors/k_nearest_neighbors.py
@@ -29,9 +29,9 @@ def __init__(self, model_root='k_nearest_neighbors'):
         recommend_path = os.path.join(model_root, 'recommend_models_%d.pkl' % TRAINING_SET_SIZE)
         evaluate_path = os.path.join(model_root, 'evaluate_model_%d.pkl' % TRAINING_SET_SIZE)
 
-        with open(recommend_path, 'r') as input_file:
+        with open(recommend_path, 'rb') as input_file:
             self.recommend_models = pickle.load(input_file)
-        with open(evaluate_path, 'r') as input_file:
+        with open(evaluate_path, 'rb') as input_file:
             self.evaluate_model = pickle.load(input_file)
 
     def transform(self, my_team, their_team):
@@ -40,7 +40,7 @@ def transform(self, my_team, their_team):
             X[hero_id - 1] = 1
         for hero_id in their_team:
             X[hero_id - 1 + NUM_HEROES] = 1
-        return X
+        return X.reshape(1, -1)
 
     def recommend(self, my_team, their_team, hero_candidates):
         '''Returns a list of (hero, probablility of winning with hero added) recommended to complete my_team.'''
@@ -62,7 +62,11 @@ def recommend(self, my_team, their_team, hero_candidates):
     def score(self, query):
         '''Score the query using the evaluation model, considering both radiant and dire teams.'''
         radiant_query = query
-        dire_query = np.concatenate((radiant_query[NUM_HEROES:NUM_FEATURES], radiant_query[0:NUM_HEROES]))
+
+        underlying_features = query[0]
+        dire_features = np.concatenate((underlying_features[NUM_HEROES:NUM_FEATURES], underlying_features[0:NUM_HEROES]))
+        dire_query = dire_features.reshape(1, -1)
+
         rad_prob = self.evaluate_model.predict_proba(radiant_query)[0][1]
         dire_prob = self.evaluate_model.predict_proba(dire_query)[0][0]
         return (rad_prob + dire_prob) / 2

diff --git a/k_nearest_neighbors/kfcv_d.py b/k_nearest_neighbors/kfcv_d.py
@@ -1,5 +1,5 @@
 from sklearn.neighbors import KNeighborsClassifier
-from sklearn import cross_validation
+from sklearn.model_selection import cross_val_score, KFold
 import numpy as np
 from progressbar import ProgressBar, Bar, Percentage, FormatLabel, ETA
 
@@ -17,18 +17,23 @@ def poly_weights(distances):
 def score(estimator, X, y):
     global pbar, FOLDS_FINISHED
     correct_predictions = 0
-    for i, radiant_query in enumerate(X):
+    for i, radiant_features in enumerate(X):
         pbar.update(FOLDS_FINISHED)
-        dire_query = np.concatenate((radiant_query[NUM_HEROES:NUM_FEATURES], radiant_query[0:NUM_HEROES]))
+
+        radiant_query = radiant_features.reshape(1, -1)
         rad_prob = estimator.predict_proba(radiant_query)[0][1]
+
+        dire_features = np.concatenate((radiant_features[NUM_HEROES:NUM_FEATURES], radiant_features[0:NUM_HEROES]))
+        dire_query = dire_features.reshape(1, -1)
         dire_prob = estimator.predict_proba(dire_query)[0][0]
+
         overall_prob = (rad_prob + dire_prob) / 2
         prediction = 1 if (overall_prob > 0.5) else -1
         result = 1 if prediction == y[i] else 0
         correct_predictions += result
     FOLDS_FINISHED += 1
     accuracy = float(correct_predictions) / len(X)
-    print 'Accuracy: %f' % accuracy
+    print(f'Accuracy: {accuracy}')
     return accuracy
 
 NUM_HEROES = 108
@@ -45,9 +50,8 @@ def score(estimator, X, y):
 X = X[0:NUM_MATCHES]
 Y = Y[0:NUM_MATCHES]
 
-print 'Training using data from %d matches...' % NUM_MATCHES
-
-k_fold = cross_validation.KFold(n=NUM_MATCHES, n_folds=K, indices=True)
+print(f'Training using data from {NUM_MATCHES} matches...')
+kfold = KFold(K)
 
 d_tries = [3, 4, 5]
 
@@ -56,9 +60,9 @@ def score(estimator, X, y):
 
 d_accuracy_pairs = []
 for d_index, d in enumerate(d_tries):
-    model = KNeighborsClassifier(n_neighbors=NUM_MATCHES/K,metric=my_distance,weights=poly_param(d))
-    model_accuracies = cross_validation.cross_val_score(model, X, Y, scoring=score, cv=k_fold)
+    model = KNeighborsClassifier(n_neighbors=NUM_MATCHES//K,metric=my_distance,weights=poly_param(d))
+    model_accuracies = cross_val_score(model, X, Y, scoring=score, cv=kfold)
     model_accuracy = model_accuracies.mean()
     d_accuracy_pairs.append((d, model_accuracy))
 pbar.finish()
-print d_accuracy_pairs
+print(d_accuracy_pairs)
diff --git a/k_nearest_neighbors/preprocess.py b/k_nearest_neighbors/preprocess.py
@@ -49,7 +49,7 @@
 
 pbar.finish()
 
-print "Permuting, generating train and test sets."
+print("Permuting, generating train and test sets.")
 indices = np.random.permutation(NUM_MATCHES)
 test_indices = indices[0:NUM_MATCHES/10]
 train_indices = indices[NUM_MATCHES/10:NUM_MATCHES]
@@ -60,7 +60,7 @@
 X_train = X[train_indices]
 Y_train = Y[train_indices]
 
-print "Saving output file now..."
+print("Saving output file now...")
 np.savez_compressed('test_%d.npz' % len(test_indices), X=X_test, Y=Y_test)
 np.savez_compressed('train_%d.npz' % len(train_indices), X=X_train, Y=Y_train)
 
diff --git a/k_nearest_neighbors/recommend_models_10000.pkl b/k_nearest_neighbors/recommend_models_10000.pkl
diff --git a/k_nearest_neighbors/test.py b/k_nearest_neighbors/test.py
@@ -25,19 +25,24 @@ def poly_weights_evaluate(distances):
     return np.array([weights])
 
 def test():
-    with open('evaluate_model_51022.pkl', 'r') as input_file:
+    with open('evaluate_model_51022.pkl', 'rb') as input_file:
             model = pickle.load(input_file)
 
     widgets = [FormatLabel('Processed: %(value)d/%(max)d matches. '), ETA(), Percentage(), ' ', Bar()]
     pbar = ProgressBar(widgets=widgets, maxval=NUM_MATCHES).start()
 
     correct_predictions = 0
     Y_pred = np.zeros(NUM_MATCHES)
-    for i, radiant_query in enumerate(X):
+    for i, radiant_features in enumerate(X):
         pbar.update(i)
-        dire_query = np.concatenate((radiant_query[NUM_HEROES:NUM_FEATURES], radiant_query[0:NUM_HEROES]))
+
+        radiant_query = radiant_features.reshape(1, -1)
         rad_prob = model.predict_proba(radiant_query)[0][1]
+
+        dire_features = np.concatenate((radiant_features[NUM_HEROES:NUM_FEATURES], radiant_features[0:NUM_HEROES]))
+        dire_query = dire_features.reshape(1, -1)
         dire_prob = model.predict_proba(dire_query)[0][0]
+
         overall_prob = (rad_prob + dire_prob) / 2
         prediction = 1 if (overall_prob > 0.5) else -1
         Y_pred[i] = 1 if prediction == 1 else 0
@@ -49,24 +54,22 @@ def test():
     pbar.finish()
 
     accuracy = float(correct_predictions) / NUM_MATCHES
-    print 'Accuracy of KNN model: %f' % accuracy
+    print(f'Accuracy of KNN model: {accuracy}')
 
     # flip all -1 true labels to 0 for f1 scoring
     for i, match in enumerate(Y):
         if match == -1:
             Y[i] = 0
 
-    prec, recall, f1, support = precision_recall_fscore_support(Y, Y_pred, average='macro')
-    print 'Precision: ',prec
-    print 'Recall: ',recall
-    print 'F1 Score: ',f1
-    print 'Support: ',support
+    prec, recall, f1, _ = precision_recall_fscore_support(Y, Y_pred, average='binary')
+    print('Precision: ',prec)
+    print('Recall: ',recall)
+    print('F1 Score: ',f1)
 
     # Accuracy of KNN model: 0.678074
     # Precision:  0.764119601329
     # Recall:  0.673499267936
     # F1 Score:  0.715953307393
-    # Support:  3415
 
 if __name__ == '__main__':
     test()
diff --git a/k_nearest_neighbors/train_evaluate.py b/k_nearest_neighbors/train_evaluate.py
@@ -7,7 +7,7 @@
 X = preprocessed['X']
 Y = preprocessed['Y']
 
-relevant_indices = range(0, 10000)
+relevant_indices = slice(0, 10000)
 X = X[relevant_indices]
 Y = Y[relevant_indices]
 
@@ -23,10 +23,10 @@ def poly_weights_evaluate(distances):
 NUM_HEROES = 108
 NUM_MATCHES = len(X)
 
-print 'Training evaluation model using data from %d matches...' % NUM_MATCHES
+print(f'Training evaluation model using data from {NUM_MATCHES} matches...')
 
 model = KNeighborsClassifier(n_neighbors=NUM_MATCHES,metric=my_distance,weights=poly_weights_evaluate).fit(X, Y)
 
 # Populate model pickle
-with open('evaluate_model_%d.pkl' % NUM_MATCHES, 'w') as output_file:
+with open('evaluate_model_%d.pkl' % NUM_MATCHES, 'wb') as output_file:
     pickle.dump(model, output_file)