如何解决网站抓取html类标识问题
我想为字符imgurl抓取MAL,以便以后使用毕加索和字符名称显示,但是在访问正确的类文件时遇到了麻烦。 这是正在检查的网站。 TYSM提前。 Inspect MAL
这是我的CharacterList类,用于标识HTML类
private RecyclerView recyclerView;
private ParseAdapter adapter;
private ArrayList<ParseItem> parseItems = new ArrayList<>();
private ProgressBar progressBar;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_to_watch_list);
configureBackButton();
progressBar = findViewById(R.id.progressBar);
recyclerView = findViewById(R.id.recyclerView_character);
recyclerView.setHasFixedSize(true);
recyclerView.setLayoutManager(new linearlayoutmanager(this));
adapter = new ParseAdapter(parseItems,this);
recyclerView.setAdapter(adapter);
Content content = new Content();
content.execute();
}
private class Content extends AsyncTask<Void,Void,Void>{
@Override
protected void onPreExecute() {
super.onPreExecute();
progressBar.setVisibility(View.VISIBLE);
progressBar.setAnimation(AnimationUtils.loadAnimation(CharacterList.this,android.R.anim.fade_in));
}
@Override
protected void onPostExecute(Void aVoid) {
super.onPostExecute(aVoid);
progressBar.setVisibility(View.GONE);
progressBar.setAnimation(AnimationUtils.loadAnimation(CharacterList.this,android.R.anim.fade_out));
adapter.notifyDataSetChanged();
}
@Override
protected void onCancelled() {
super.onCancelled();
}
@Override
protected Void doInBackground(Void... voids) {
try{
//website url
String url = "https://myanimelist.net/character.PHP";
Document doc = Jsoup.connect(url).get();
Elements data = doc.select("tr.people");
int size = data.size();
for (int i = 0; i < size; i++){
String imgurl = data.select("a.mr8.ml12.fl-l")
.select("img")
.eq(i)
.attr("src");
String title = data.select("tr.mt24.di-ib.information")
.select("a.fw-b.fs14")
.eq(i)
.text();
parseItems.add(new ParseItem(imgurl,title));
Log.d("items","img: " +imgurl + " .title: " + title);
}
} catch (IOException e) {
e.printstacktrace();
}
return null;
}
}
我相信我的ParseAdapter和ParseItem可以正常工作,但是以防万一,在这里
public class ParseAdapter extends RecyclerView.Adapter<ParseAdapter.ViewHolder> {
private ArrayList<ParseItem> parseItems;
private Context context;
public ParseAdapter(ArrayList<ParseItem> parseItems,Context context){
this.parseItems = parseItems;
this.context = context;
}
@NonNull
@Override
public ParseAdapter.ViewHolder onCreateViewHolder(@NonNull ViewGroup parent,int viewType) {
View view = LayoutInflater.from(parent.getContext()).inflate(R.layout.parse_item,parent,false);
return new ViewHolder(view);
}
@Override
public void onBindViewHolder(@NonNull ParseAdapter.ViewHolder holder,int position) {
ParseItem parseItem = parseItems.get(position);
holder.textView.setText(parseItem.getTitle());
Picasso
//with vs get()
.with(this.context)
.load(parseItem.getimgurl())
.into(holder.imageView);
}
@Override
public int getItemCount() {
return parseItems.size();
}
public class ViewHolder extends RecyclerView.ViewHolder{
ImageView imageView;
TextView textView;
public ViewHolder(@NonNull View itemView) {
super(itemView);
imageView = itemView.findViewById(R.id.imageView_character);
textView = itemView.findViewById(R.id.textView_character);
}
}
}
public class ParseItem {
private String imgurl;
private String title;
public ParseItem(){
}
public ParseItem(String imgurl,String title) {
this.imgurl = imgurl;
this.title = title;
}
public String getimgurl() {
return imgurl;
}
public void setimgurl(String imgurl) {
this.imgurl = imgurl;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}}
解决方法
我没想到。这是班级识别的部分,以防以后有人被卡住。
Elements data = doc.select("td.people");
int size = data.size();
for (int i = 0; i < size; i++){
String imgUrl = data.select("a.fl-l.ml12.mr8")
.select("img")
.eq(i)
.attr("data-src");
String title = data.select("div.information.di-ib.mt24")
.select("a.fs14.fw-b")
.eq(i)
.text();
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。