From 3f0cf97ecd021fe8e09b8f78f35ac61ccb547a5f Mon Sep 17 00:00:00 2001 From: Nehal Patel <1161604+habemus-papadum@users.noreply.github.com> Date: Wed, 5 Jul 2023 09:59:29 -0400 Subject: [PATCH 1/2] Move punkt download to models.py This allows for `punkt` to be cached in the gerev docker images along with the HF models --- app/models.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/app/models.py b/app/models.py index bf27efc4..b426cb2d 100644 --- a/app/models.py +++ b/app/models.py @@ -1,7 +1,7 @@ from sentence_transformers import SentenceTransformer, CrossEncoder from transformers import pipeline import torch - +import nltk bi_encoder = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1') @@ -9,3 +9,5 @@ cross_encoder_large = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2') qa_model = pipeline('question-answering', model='deepset/roberta-base-squad2') + +nltk.download('punkt') From dc3c1358ba67e91d945e5c5ee14509960ab07671 Mon Sep 17 00:00:00 2001 From: Nehal Patel <1161604+habemus-papadum@users.noreply.github.com> Date: Wed, 5 Jul 2023 10:02:02 -0400 Subject: [PATCH 2/2] Location of `punkt` download has changed --- app/search_logic.py | 1 - 1 file changed, 1 deletion(-) diff --git a/app/search_logic.py b/app/search_logic.py index 34673eaf..b1595c34 100644 --- a/app/search_logic.py +++ b/app/search_logic.py @@ -25,7 +25,6 @@ BI_ENCODER_CANDIDATES = 60 if torch.cuda.is_available() else 20 SMALL_CROSS_ENCODER_CANDIDATES = 30 if torch.cuda.is_available() else 10 -nltk.download('punkt') logger = logging.getLogger(__name__)