如何解决创建干净的文本不带 \n 并添加语言:HTTPError: HTTP Error 403: Forbidden Python
我目前正在使用 Python 进行自然语言处理。这里的目标是将一些 pdf 转换为文本,然后进行一些文本清理。 正如标题试图告诉我正在尝试进行一些文本清理,我使用 TextBlob,下面是我的定义:
此单元格之前的附加信息我使用了 PDFMiner:PDF to Text。
def get_language(row):
b = TextBlob(row['clean_text'])
return b.detect_language()
df_cvs['clean_text'] = df_cvs['text'].str.replace('\n',' ')
df_cvs.loc[:,'language'] = df_cvs.apply(lambda row: get_language(row),axis=1)
df_cvs.head(2)```
Now the error is the following:
Error details below: **HTTPError: HTTP Error 403: Forbidden**
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
<ipython-input-71-f02d06c663d3> in <module>
6
7 df_cvs['clean_text'] = df_cvs['text'].str.replace('\n',' ')
----> 8 df_cvs.loc[:,axis=1)
9
10 df_cvs.head(2)
~\Anaconda3\lib\site-packages\pandas\core\frame.py in apply(self,func,axis,raw,result_type,args,**kwds)
7546 kwds=kwds,7547 )
-> 7548 return op.get_result()
7549
7550 def applymap(self,func) -> "DataFrame":
~\Anaconda3\lib\site-packages\pandas\core\apply.py in get_result(self)
178 return self.apply_raw()
179
--> 180 return self.apply_standard()
181
182 def apply_empty_result(self):
~\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_standard(self)
269
270 def apply_standard(self):
--> 271 results,res_index = self.apply_series_generator()
272
273 # wrap results
~\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_series_generator(self)
298 for i,v in enumerate(series_gen):
299 # ignore SettingWithCopy here in case the user mutates
--> 300 results[i] = self.f(v)
301 if isinstance(results[i],ABCSeries):
302 # If we have a view on v,we need to make a copy because
<ipython-input-71-f02d06c663d3> in <lambda>(row)
6
7 df_cvs['clean_text'] = df_cvs['text'].str.replace('\n',axis=1)
9
10 df_cvs.head(2)
<ipython-input-71-f02d06c663d3> in get_language(row)
3 def get_language(row):
4 b = TextBlob(row['clean_text'])
----> 5 return b.detect_language()
6
7 df_cvs['clean_text'] = df_cvs['text'].str.replace('\n',' ')
~\Anaconda3\lib\site-packages\textblob\blob.py in detect_language(self)
566 :rtype: str
567 """
--> 568 return self.translator.detect(self.raw)
569
570 def correct(self):
~\Anaconda3\lib\site-packages\textblob\translate.py in detect(self,source,host,type_)
70 data = {"q": source}
71 url = u'{url}&sl=auto&tk={tk}'.format(url=self.url,tk=_calculate_tk(source))
---> 72 response = self._request(url,host=host,type_=type_,data=data)
73 result,language = json.loads(response)
74 return language
~\Anaconda3\lib\site-packages\textblob\translate.py in _request(self,url,type_,data)
90 if host or type_:
91 req.set_proxy(host=host,type=type_)
---> 92 resp = request.urlopen(req)
93 content = resp.read()
94 return content.decode('utf-8')
~\Anaconda3\lib\urllib\request.py in urlopen(url,data,timeout,cafile,capath,cadefault,context)
220 else:
221 opener = _opener
--> 222 return opener.open(url,timeout)
223
224 def install_opener(opener):
~\Anaconda3\lib\urllib\request.py in open(self,fullurl,timeout)
529 for processor in self.process_response.get(protocol,[]):
530 meth = getattr(processor,meth_name)
--> 531 response = meth(req,response)
532
533 return response
~\Anaconda3\lib\urllib\request.py in http_response(self,request,response)
638 # request was successfully received,understood,and accepted.
639 if not (200 <= code < 300):
--> 640 response = self.parent.error(
641 'http',response,code,msg,hdrs)
642
~\Anaconda3\lib\urllib\request.py in error(self,proto,*args)
567 if http_err:
568 args = (dict,'default','http_error_default') + orig_args
--> 569 return self._call_chain(*args)
570
571 # XXX probably also want an abstract factory that knows when it makes
~\Anaconda3\lib\urllib\request.py in _call_chain(self,chain,kind,meth_name,*args)
500 for handler in handlers:
501 func = getattr(handler,meth_name)
--> 502 result = func(*args)
503 if result is not None:
504 return result
~\Anaconda3\lib\urllib\request.py in http_error_default(self,req,fp,hdrs)
647 class HTTPDefaultErrorHandler(BaseHandler):
648 def http_error_default(self,hdrs):
--> 649 raise HTTPError(req.full_url,hdrs,fp)
650
651 class HTTPRedirectHandler(BaseHandler):
HTTPError: HTTP Error 403: Forbidden
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。