kgaughan · kgaughan · Aug 18, 2016 · Aug 17, 2016
diff --git a/README b/README
@@ -3,7 +3,7 @@ uwhoisd
 
 .. image:: https://secure.travis-ci.org/kgaughan/uwhoisd.png?branch=master
    :width: 89px
-   :height: 13px 
+   :height: 13px
    :target: http://travis-ci.org/kgaughan/uwhoisd
 
 A 'Universal WHOIS' proxy server: you query it for information about a
@@ -14,5 +14,5 @@ It is only intended for use with domain names currently, but could be
 generalised to work with other types of WHOIS server.
 
 The daemon comes with a scraper to pull WHOIS server information from IANA's
-root zone database at `tools/scraper.py`. This requires the `beautifulscraper
-<https://pypi.python.org/pypi/beautifulscraper>`_ package to run.
+root zone database at `tools/scraper.py`. This requires the `beautifulsoup4
+<https://pypi.python.org/pypi/beautifulsoup4>`_ package to run.
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -2,3 +2,7 @@
 
 # Documentation.
 Sphinx
+
+# scrape IANA
+beautifulsoup4
+requests
diff --git a/tools/scraper.py b/tools/scraper.py
@@ -6,10 +6,14 @@
 import socket
 import sys
 import time
-import urlparse
 
-import beautifulscraper
+try:
+    from urllib.parse import urljoin
+except ImportError:
+    from urlparse import urljoin
 
+from bs4 import BeautifulSoup
+import requests
 
 ROOT_ZONE_DB = 'http://www.iana.org/domains/root/db'
 SLEEP = 0
@@ -23,14 +27,14 @@ def main():
     """
     logging.basicConfig(stream=sys.stderr, level=logging.INFO)
 
-    print '[overrides]'
+    print('[overrides]')
 
     logging.info("Scraping %s", ROOT_ZONE_DB)
-    scraper = beautifulscraper.BeautifulScraper()
-    body = scraper.go(ROOT_ZONE_DB)
+    zone_page = requests.get(ROOT_ZONE_DB).text
+    soup = BeautifulSoup(zone_page, 'html.parser')
 
     no_server = []
-    for link in body.select('#tld-table .tld a'):
+    for link in soup.select('#tld-table .tld a'):
         if 'href' not in link.attrs:
             continue
 
@@ -43,9 +47,10 @@ def main():
 
         time.sleep(SLEEP)
 
-        zone_url = urlparse.urljoin(ROOT_ZONE_DB, link.attrs['href'])
+        zone_url = urljoin(ROOT_ZONE_DB, link.attrs['href'])
         logging.info("Scraping %s", zone_url)
-        body = scraper.go(zone_url)
+        b = requests.get(zone_url).text
+        body = BeautifulSoup(b, 'html.parser')
 
         title = body.find('h1')
         if title is None:
@@ -55,7 +60,7 @@ def main():
         if len(title_parts) != 2:
             logging.info("Could not find TLD in '%s'", title)
             continue
-        ace_zone = title_parts[1].encode('idna').lower()
+        ace_zone = title_parts[1].encode('idna').decode().lower()
 
         whois_server_label = body.find('b', text='WHOIS Server:')
         whois_server = ''
@@ -76,10 +81,10 @@ def main():
             no_server.append(ace_zone)
         else:
             logging.info("WHOIS server for %s is %s", ace_zone, whois_server)
-            print '%s=%s' % (ace_zone, whois_server)
+            print('%s=%s' % (ace_zone, whois_server))
 
     for ace_zone in no_server:
-        print '; No record for %s' % ace_zone
+        print('; No record for %s' % ace_zone)
 
     logging.info("Done")