@@ -6,53 +6,10 @@ FROM rabidsheep55/python-base-eval-layer
66
77WORKDIR /app
88
9- # RUN mkdir /usr/share/nltk_data
10- # RUN mkdir -p /usr/share/nltk_data/corpora /usr/share/nltk_data/models /usr/share/nltk_data/tokenizers
11-
12- # ARG NLTK_DATA=/usr/share/nltk_data
13-
14- # ENV NLTK_DATA=/usr/share/nltk_data
159# Copy and install any packages/modules needed for your evaluation script.
1610COPY requirements.txt .
17- # COPY brown_length .
18- # COPY word_freqs .
19- # COPY w2v .
20- # RUN yum install -y wget unzip
2111RUN pip3 install -r requirements.txt
2212
23- # # Download NLTK data files
24- # RUN wget -O /usr/share/nltk_data/corpora/wordnet.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/wordnet.zip
25- # RUN wget -O /usr/share/nltk_data/models/word2vec_sample.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/models/word2vec_sample.zip
26- # RUN wget -O /usr/share/nltk_data/corpora/brown.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/brown.zip
27- # RUN wget -O /usr/share/nltk_data/corpora/stopwords.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip
28- # RUN wget -O /usr/share/nltk_data/tokenizers/punkt.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip
29- # RUN wget -O /usr/share/nltk_data/tokenizers/punkt_tab.zip https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt_tab.zip
30-
31- # # Unzip the downloaded files into the correct subfolders corresponsing to NLTK requirements
32- # RUN unzip /usr/share/nltk_data/corpora/wordnet.zip -d /usr/share/nltk_data/corpora/
33- # RUN unzip /usr/share/nltk_data/models/word2vec_sample.zip -d /usr/share/nltk_data/models/
34- # RUN unzip /usr/share/nltk_data/corpora/brown.zip -d /usr/share/nltk_data/corpora/
35- # RUN unzip /usr/share/nltk_data/corpora/stopwords.zip -d /usr/share/nltk_data/corpora/
36- # RUN unzip /usr/share/nltk_data/tokenizers/punkt.zip -d /usr/share/nltk_data/tokenizers/
37- # RUN unzip /usr/share/nltk_data/tokenizers/punkt_tab.zip -d /usr/share/nltk_data/tokenizers/
38-
39- # # Clean up zip files to reduce image size
40- # RUN rm /usr/share/nltk_data/corpora/*.zip
41- # RUN rm /usr/share/nltk_data/models/*.zip
42- # RUN rm /usr/share/nltk_data/tokenizers/*.zip
43-
44- # Warnings: those commands sometimes download corrupted zips, so it is better to wget each package from the main site
45- # RUN python -m nltk.downloader wordnet
46- # RUN python -m nltk.downloader word2vec_sample
47- # RUN python -m nltk.downloader brown
48- # RUN python -m nltk.downloader stopwords
49- # RUN python -m nltk.downloader punkt
50- # RUN python -m nltk.downloader punkt_tab
51-
52- # Copy the evaluation and testing scripts
53- # COPY brown_length ./app/
54- # COPY word_freqs ./app/
55- # COPY w2v ./app/
5613COPY evaluation.py ./app/
5714COPY evaluation_tests.py ./app/
5815
0 commit comments