You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: app/Dockerfile
+28-24Lines changed: 28 additions & 24 deletions
Original file line number
Diff line number
Diff line change
@@ -15,34 +15,38 @@ COPY requirements.txt .
15
15
COPY brown_length .
16
16
COPY word_freqs .
17
17
COPY w2v .
18
-
# RUN apt-get update && apt-get install -y wget unzip
18
+
RUN cat /etc/os-release
19
+
RUN yum install -y wget unzip
19
20
RUN pip3 install -r requirements.txt
20
21
21
-
# # Download NLTK data files
22
-
# RUN wget -O /usr/share/nltk_data/corpora/wordnet.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/wordnet.zip
23
-
# RUN wget -O /usr/share/nltk_data/models/word2vec_sample.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/models/word2vec_sample.zip
24
-
# RUN wget -O /usr/share/nltk_data/corpora/brown.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/brown.zip
25
-
# RUN wget -O /usr/share/nltk_data/corpora/stopwords.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip
26
-
# RUN wget -O /usr/share/nltk_data/tokenizers/punkt.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip
27
-
28
-
# # Unzip the downloaded files into the correct subfolders
29
-
# RUN unzip /usr/share/nltk_data/corpora/wordnet.zip -d /usr/share/nltk_data/corpora/
30
-
# RUN unzip /usr/share/nltk_data/models/word2vec_sample.zip -d /usr/share/nltk_data/models/
31
-
# RUN unzip /usr/share/nltk_data/corpora/brown.zip -d /usr/share/nltk_data/corpora/
32
-
# RUN unzip /usr/share/nltk_data/corpora/stopwords.zip -d /usr/share/nltk_data/corpora/
33
-
# RUN unzip /usr/share/nltk_data/tokenizers/punkt.zip -d /usr/share/nltk_data/tokenizers/
34
-
35
-
# # Clean up zip files to reduce image size
36
-
# RUN rm /usr/share/nltk_data/corpora/*.zip
37
-
# RUN rm /usr/share/nltk_data/models/*.zip
38
-
# RUN rm /usr/share/nltk_data/tokenizers/*.zip
22
+
# Download NLTK data files
23
+
RUN wget -O /usr/share/nltk_data/corpora/wordnet.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/wordnet.zip
24
+
RUN wget -O /usr/share/nltk_data/models/word2vec_sample.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/models/word2vec_sample.zip
25
+
RUN wget -O /usr/share/nltk_data/corpora/brown.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/brown.zip
26
+
RUN wget -O /usr/share/nltk_data/corpora/stopwords.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip
27
+
RUN wget -O /usr/share/nltk_data/tokenizers/punkt.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip
28
+
RUN wget -O /usr/share/nltk_data/tokenizers/punkt_tab.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt_tab.zip
29
+
30
+
# Unzip the downloaded files into the correct subfolders
31
+
RUN unzip /usr/share/nltk_data/corpora/wordnet.zip -d /usr/share/nltk_data/corpora/
32
+
RUN unzip /usr/share/nltk_data/models/word2vec_sample.zip -d /usr/share/nltk_data/models/
33
+
RUN unzip /usr/share/nltk_data/corpora/brown.zip -d /usr/share/nltk_data/corpora/
34
+
RUN unzip /usr/share/nltk_data/corpora/stopwords.zip -d /usr/share/nltk_data/corpora/
35
+
RUN unzip /usr/share/nltk_data/tokenizers/punkt.zip -d /usr/share/nltk_data/tokenizers/
36
+
RUN unzip /usr/share/nltk_data/tokenizers/punkt_tab.zip -d /usr/share/nltk_data/tokenizers/
37
+
38
+
# Clean up zip files to reduce image size
39
+
RUN rm /usr/share/nltk_data/corpora/*.zip
40
+
RUN rm /usr/share/nltk_data/models/*.zip
41
+
RUN rm /usr/share/nltk_data/tokenizers/*.zip
39
42
40
43
# Warnings: those commands sometimes download corrupted zips, so it is better to wget each package from the main site
0 commit comments