如何解决Docker - 无法将容器内生成的文件从中复制出来
我在使用 GPU 的服务器上的 docker 容器内生成了几个包含文本统计信息的 .txt
文件。在 25 小时后进程完成后,我无法将生成的文件中的部分复制出容器以备后用,而可以复制其他生成的文件。
from collections import Counter,defaultdict
import zlib
import re
import numpy as np
import string
import binascii
from tqdm import tqdm
import stanza
STOP_WORDS = set(["a","an","and","are","as","at","be","but","by","for","if","in","into","is","it","no","not","of","on","or","such","that","the","their","then","there","these","they","this","to","was","will","with"])
stanza.download("en")
nlp = stanza.Pipeline(lang='en',processors='tokenize,mwt,pos,lemma')
for filename in ["dataset.csv"]:
documents = []
# ...
# Code here that generates and stores a document
# that can be copied out
# ...
i = 0
token_to_id = {}
with open(f"{basename}_token_to_id_lemmatized.txt","w") as f,\
open(f"{basename}_token_to_docs_lemmatized.txt","w") as f2,\
open(f"{basename}_token_to_freqs_lemmatized.txt","w") as f3:
# trick to reduce memory-in-use
for max_range in [(0,1000),(1000,2000),(2000,3000),(3000,10000),(10000,20000),(20000,1000000)]:
token_to_docs = defaultdict(list)
for doc_id,doc in enumerate(tqdm(documents)):
for token,num_token in Counter(doc.split("|")).items():
if not token or token not in dictionary:
continue
if not token_to_id.get(token):
token_to_id[token] = i
f.write(f"{token}\n")
i += 1
token_id = token_to_id[token]
if max_range[0] <= token_id < max_range[1]:
token_to_docs[token_id].append((doc_id,num_token))
for token_id in tqdm(range(max_range[0],max_range[1])):
for doc_id,num_token in token_to_docs[token_id]:
f2.write(f"{doc_id},")
f3.write(f"{num_token},")
if token_to_docs[token_id]:
f2.write("\n")
f3.write("\n")
我尝试从控制台中复制出容器:
docker cp container_id:/container_workdir/only_premise_dataset_token_to_id_lemmatized.txt /destination
这是我的 Dockerfile:
FROM nvidia/cuda:10.0-cudnn7-runtime-ubuntu18.04
RUN apt-get update && apt-get install -y python3 python3-pip git build-essential libssl-dev libffi-dev #libcupti-dev
workdir /container_workdir
copY requirements.txt ./
RUN pip3 install --upgrade pip
RUN pip3 install --upgrade setuptools
RUN pip install -r requirements.txt
copY . .
ENV CUDA_VISIBLE_DEVICES=7
RUN export CUDA_VISIBLE_DEVICES=7
CMD bash
请注意,我还尝试将文件重新存储为 .csv
和 .tsv
以及容器内的 pickle
。这些方法都没有奏效。
Error: No such container:path: container_id:/container_workdir/only_premise_dataset_token_to_id_lemmatized.txt
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。