如何解决在 spaCy 3.0 中加载管道时出错
更新到 spaCy 3.0.6 后,我无法加载任何经过训练的管道,尽管两者似乎都已正确安装:
================= Installed pipeline packages (spaCy v3.0.6) =================
ℹ spaCy installation:
/Users/baconbaker/anaconda3/envs/ml/lib/python3.8/site-packages/spacy
NAME SPACY VERSION
en_core_web_sm >=3.0.0,<3.1.0 3.0.0 ✔
en_core_web_trf >=3.0.0,<3.1.0 3.0.0 ✔
在使用 spacy.load() 并将管道作为模块导入时会发生这种情况(以下所有行的错误都相同):
nlp = spacy.load("en_core_web_trf")
nlp = spacy.load("en_core_web_sm")
import en_core_web_sm
nlp = en_core_web_sm.load()
import en_core_web_trf
nlp = en_core_web_trf.load()
我得到的错误如下:
---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
<ipython-input-9-b38eb3aae320> in <module>
1 import en_core_web_trf
----> 2 nlp = en_core_web_trf.load()
~/anaconda3/envs/ml/lib/python3.8/site-packages/en_core_web_trf/__init__.py in load(**overrides)
8
9 def load(**overrides):
---> 10 return load_model_from_init_py(__file__,**overrides)
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy/util.py in load_model_from_init_py(init_file,vocab,disable,exclude,config)
514 if not model_path.exists():
515 raise IOError(Errors.E052.format(path=data_path))
--> 516 return load_model_from_path(
517 data_path,518 vocab=vocab,~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy/util.py in load_model_from_path(model_path,Meta,config)
389 config_path = model_path / "config.cfg"
390 config = load_config(config_path,overrides=dict_to_dot(config))
--> 391 nlp = load_model_from_config(config,vocab=vocab,disable=disable,exclude=exclude)
392 return nlp.from_disk(model_path,exclude=exclude)
393
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy/util.py in load_model_from_config(config,auto_fill,validate)
426 # registry,including custom subclasses provided via entry points
427 lang_cls = get_lang_class(nlp_config["lang"])
--> 428 nlp = lang_cls.from_config(
429 config,430 vocab=vocab,~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy/language.py in from_config(cls,config,validate)
1637 # then we would load them twice at runtime: once when we make from config,1638 # and then again when we load from disk.
-> 1639 nlp = lang_cls(vocab=vocab,create_tokenizer=create_tokenizer,Meta=Meta)
1640 if after_creation is not None:
1641 nlp = after_creation(nlp)
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy/language.py in __init__(self,max_length,create_tokenizer,batch_size,**kwargs)
148 # points. The factory decorator applied to these functions takes care
149 # of the rest.
--> 150 util.registry._entry_point_factories.get_all()
151
152 self._config = DEFAULT_CONfig.merge(self.default_config)
~/anaconda3/envs/ml/lib/python3.8/site-packages/catalogue/__init__.py in get_all(self)
106 result = {}
107 if self.entry_points:
--> 108 result.update(self.get_entry_points())
109 for keys,value in REGISTRY.items():
110 if len(self.namespace) == len(keys) - 1 and all(
~/anaconda3/envs/ml/lib/python3.8/site-packages/catalogue/__init__.py in get_entry_points(self)
121 result = {}
122 for entry_point in AVAILABLE_ENTRY_POINTS.get(self.entry_point_namespace,[]):
--> 123 result[entry_point.name] = entry_point.load()
124 return result
125
~/anaconda3/envs/ml/lib/python3.8/importlib/Metadata.py in load(self)
75 """
76 match = self.pattern.match(self.value)
---> 77 module = import_module(match.group('module'))
78 attrs = filter(None,(match.group('attr') or '').split('.'))
79 return functools.reduce(getattr,attrs,module)
~/anaconda3/envs/ml/lib/python3.8/importlib/__init__.py in import_module(name,package)
125 break
126 level += 1
--> 127 return _bootstrap._gcd_import(name[level:],package,level)
128
129
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _gcd_import(name,level)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _find_and_load(name,import_)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _find_and_load_unlocked(name,import_)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _call_with_frames_removed(f,*args,**kwds)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _gcd_import(name,import_)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _load_unlocked(spec)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap_external.py in exec_module(self,module)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _call_with_frames_removed(f,**kwds)
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy_transformers/__init__.py in <module>
----> 1 from . import architectures
2 from . import annotation_setters
3 from . import span_getters
4 from .layers import TransformerModel
5 from .pipeline_component import Transformer,install_extensions
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy_transformers/architectures.py in <module>
3 from thinc.types import Ragged,Floats2d
4 from spacy.tokens import Doc
----> 5 from .layers import TransformerModel,TransformerListener
6 from .layers import trfs2arrays,split_trf_batch
7 from .util import registry
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy_transformers/layers/__init__.py in <module>
----> 1 from .listener import TransformerListener
2 from .transformer_model import TransformerModel
3 from .split_trf import split_trf_batch
4 from .trfs2arrays import trfs2arrays
5
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy_transformers/layers/listener.py in <module>
2 from thinc.api import Model
3 from spacy.tokens import Doc
----> 4 from ..data_classes import TransformerData
5
6
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy_transformers/data_classes.py in <module>
9 import srsly
10
---> 11 from .util import transpose_list
12 from .align import get_token_positions
13
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy_transformers/util.py in <module>
2 from pathlib import Path
3 import random
----> 4 from transformers import AutoModel,AutoTokenizer
5 from transformers.tokenization_utils import BatchEncoding
6 from transformers.tokenization_utils_fast import PreTrainedTokenizerFast
~/anaconda3/envs/ml/lib/python3.8/site-packages/transformers/__init__.py in <module>
624
625 # Trainer
--> 626 from .trainer import Trainer
627 from .trainer_pt_utils import torch_distributed_zero_first
628 else:
~/anaconda3/envs/ml/lib/python3.8/site-packages/transformers/trainer.py in <module>
67 TrainerState,68 )
---> 69 from .trainer_pt_utils import (
70 distributedTensorGatherer,71 SequentialdistributedSampler,~/anaconda3/envs/ml/lib/python3.8/site-packages/transformers/trainer_pt_utils.py in <module>
38 SAVE_STATE_WARNING = ""
39 else:
---> 40 from torch.optim.lr_scheduler import SAVE_STATE_WARNING
41
42 logger = logging.get_logger(__name__)
ImportError: cannot import name 'SAVE_STATE_WARNING' from 'torch.optim.lr_scheduler' (/Users/baconbaker/anaconda3/envs/ml/lib/python3.8/site-packages/torch/optim/lr_scheduler.py)
从当前稳定版本 1.8.1 恢复到 Torch 1.4.0 解决了问题,但我不想这样做。
有替代的解决方案吗?
解决方法
这似乎已在较新版本的 transformers
(https://github.com/huggingface/transformers/pull/8979) 中得到修复。尝试同时升级 transformers
和 spacy-transformers
。
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。