Skip to content

Commit e39b344

Browse files
committed
fix: Revert to nltk download in docker
1 parent 053d005 commit e39b344

File tree

1 file changed

+24
-24
lines changed

1 file changed

+24
-24
lines changed

app/Dockerfile

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -18,34 +18,34 @@ COPY w2v .
1818
RUN yum install -y wget unzip
1919
RUN pip3 install -r requirements.txt
2020

21-
# Download NLTK data files
22-
RUN wget -O /usr/share/nltk_data/corpora/wordnet.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/wordnet.zip
23-
RUN wget -O /usr/share/nltk_data/models/word2vec_sample.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/models/word2vec_sample.zip
24-
RUN wget -O /usr/share/nltk_data/corpora/brown.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/brown.zip
25-
RUN wget -O /usr/share/nltk_data/corpora/stopwords.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip
26-
RUN wget -O /usr/share/nltk_data/tokenizers/punkt.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip
27-
RUN wget -O /usr/share/nltk_data/tokenizers/punkt_tab.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt_tab.zip
21+
# # Download NLTK data files
22+
# RUN wget -O /usr/share/nltk_data/corpora/wordnet.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/wordnet.zip
23+
# RUN wget -O /usr/share/nltk_data/models/word2vec_sample.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/models/word2vec_sample.zip
24+
# RUN wget -O /usr/share/nltk_data/corpora/brown.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/brown.zip
25+
# RUN wget -O /usr/share/nltk_data/corpora/stopwords.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip
26+
# RUN wget -O /usr/share/nltk_data/tokenizers/punkt.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip
27+
# RUN wget -O /usr/share/nltk_data/tokenizers/punkt_tab.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt_tab.zip
2828

29-
# Unzip the downloaded files into the correct subfolders corresponsing to NLTK requirements
30-
RUN unzip /usr/share/nltk_data/corpora/wordnet.zip -d /usr/share/nltk_data/corpora/
31-
RUN unzip /usr/share/nltk_data/models/word2vec_sample.zip -d /usr/share/nltk_data/models/
32-
RUN unzip /usr/share/nltk_data/corpora/brown.zip -d /usr/share/nltk_data/corpora/
33-
RUN unzip /usr/share/nltk_data/corpora/stopwords.zip -d /usr/share/nltk_data/corpora/
34-
RUN unzip /usr/share/nltk_data/tokenizers/punkt.zip -d /usr/share/nltk_data/tokenizers/
35-
RUN unzip /usr/share/nltk_data/tokenizers/punkt_tab.zip -d /usr/share/nltk_data/tokenizers/
29+
# # Unzip the downloaded files into the correct subfolders corresponsing to NLTK requirements
30+
# RUN unzip /usr/share/nltk_data/corpora/wordnet.zip -d /usr/share/nltk_data/corpora/
31+
# RUN unzip /usr/share/nltk_data/models/word2vec_sample.zip -d /usr/share/nltk_data/models/
32+
# RUN unzip /usr/share/nltk_data/corpora/brown.zip -d /usr/share/nltk_data/corpora/
33+
# RUN unzip /usr/share/nltk_data/corpora/stopwords.zip -d /usr/share/nltk_data/corpora/
34+
# RUN unzip /usr/share/nltk_data/tokenizers/punkt.zip -d /usr/share/nltk_data/tokenizers/
35+
# RUN unzip /usr/share/nltk_data/tokenizers/punkt_tab.zip -d /usr/share/nltk_data/tokenizers/
3636

37-
# Clean up zip files to reduce image size
38-
RUN rm /usr/share/nltk_data/corpora/*.zip
39-
RUN rm /usr/share/nltk_data/models/*.zip
40-
RUN rm /usr/share/nltk_data/tokenizers/*.zip
37+
# # Clean up zip files to reduce image size
38+
# RUN rm /usr/share/nltk_data/corpora/*.zip
39+
# RUN rm /usr/share/nltk_data/models/*.zip
40+
# RUN rm /usr/share/nltk_data/tokenizers/*.zip
4141

4242
# Warnings: those commands sometimes download corrupted zips, so it is better to wget each package from the main site
43-
# RUN python -m nltk.downloader wordnet
44-
# RUN python -m nltk.downloader word2vec_sample
45-
# RUN python -m nltk.downloader brown
46-
# RUN python -m nltk.downloader stopwords
47-
# RUN python -m nltk.downloader punkt
48-
# RUN python -m nltk.downloader punkt_tab
43+
RUN python -m nltk.downloader wordnet
44+
RUN python -m nltk.downloader word2vec_sample
45+
RUN python -m nltk.downloader brown
46+
RUN python -m nltk.downloader stopwords
47+
RUN python -m nltk.downloader punkt
48+
RUN python -m nltk.downloader punkt_tab
4949

5050
# Copy the evaluation and testing scripts
5151
COPY brown_length ./app/

0 commit comments

Comments
 (0)