8000 fix: fix group by thread query performance issues by rpcross · Pull Request #3926 · ietf-tools/mailarchive · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

fix: fix group by thread query performance issues #3926

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions backend/mlarchive/archive/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@

THREAD_SORT_FIELDS = ('-thread__date', 'thread_id', 'thread_order')
DATE_PATTERN = re.compile(r'(?P<year>\d{4})(?:-(?P<month>\d{2}))?')
BROWSE_GBT_PATTERN = re.compile(r'^/arch/browse/[^/]+/\?.*?\bgbt=1\b')
TimePeriod = namedtuple('TimePeriod', 'year, month')

# --------------------------------------------------
Expand Down Expand Up @@ -370,6 +371,20 @@ def build_page(self):
except InvalidPage:
raise Http404("No such page!")

# AVOID LOW LIMIT VALUES WHEN GROUPING BY THREAD
# the Django paginator will set LIMIT = message count if it is less
# than results_per_page. The problem is the low LIMIT value (<12)
# causes Postgres query planner to choose a very inefficient query plan
# for certain types of queries. So set query.high_mark, which is
# used to set limit, back to results_per_page.
if (
BROWSE_GBT_PATTERN.match(self.request.get_full_path())
and hasattr(page, 'object_list')
and hasattr(page.object_list, 'query')
and page.object_list.query.high_mark < self.results_per_page
):
page.object_list.query.high_mark = self.results_per_page
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will results_per_page always be greater than 12? Would it work to use max(self.results_per_page, 12) here, or will that break the pagination?

If it breaks the pagination, then we should probably put this fix in and plan to restrict the results_per_page to >= 12.


return (paginator, page)

def get_query(self):
Expand Down
23 changes: 21 additions & 2 deletions backend/mlarchive/tests/archive/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@
from django.utils.http import urlencode
from django.utils.encoding import smart_str
from factories import EmailListFactory, MessageFactory, UserFactory, SubscriberFactory
from mlarchive.archive.models import Message, Attachment, Redirect
from mlarchive.archive.models import Message, Attachment, Redirect, EmailList
from mlarchive.archive.views import (TimePeriod, add_nav_urls, is_small_year,
get_this_next_periods, get_date_endpoints, get_thread_endpoints, DateStaticIndexView)
get_this_next_periods, get_date_endpoints, get_thread_endpoints, DateStaticIndexView,
CustomBrowseView)
from mlarchive.utils.test_utils import login_testing_unauthorized
from mlarchive.utils.test_utils import load_message

Expand Down Expand Up @@ -272,6 +273,24 @@ def test_browse_gbt(client, messages):
assert [r.pk for r in response.context['results']] == [m.pk for m in messages]


@pytest.mark.django_db(transaction=True)
def test_browse_gbt_build_page(client, messages):
'''Test that query.high_mark is raised to results_per_page to
avoid slow queries with low LIMIT
'''
email_list = EmailList.objects.get(name='apple')
url = reverse('archive_browse_list', kwargs={'list_name': 'apple'}) + '?gbt=1'
request = RequestFactory().get(url)
view = CustomBrowseView()
view.setup(request, list_name='apple', email_list=email_list)
_ = view.get(request, list_name='apple', email_list=email_list)
paginator, page = view.build_page()
count = email_list.message_set.count()
assert page.object_list.query.high_mark > count
assert page.object_list.query.high_mark == view.results_per_page
assert f'LIMIT {view.results_per_page}' in str(page.object_list.query)


@pytest.mark.django_db(transaction=True)
def test_browse_list_sort_subject(client, messages):
url = reverse('archive_browse_list', kwargs={'list_name': 'pubone'}) + '?so=subject'
Expand Down
0