微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

网站抓取html类标识问题

如何解决网站抓取html类标识问题

我想为字符imgurl抓取MAL,以便以后使用毕加索和字符名称显示,但是在访问正确的类文件时遇到了麻烦。 这是正在检查的网站。 TYSM提前。 Inspect MAL

这是我的CharacterList类,用于标识HTML类

private RecyclerView recyclerView;
private ParseAdapter adapter;
private ArrayList<ParseItem> parseItems = new ArrayList<>();
private ProgressBar progressBar;

@Override
protected void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    setContentView(R.layout.activity_to_watch_list);

    configureBackButton();

    progressBar = findViewById(R.id.progressBar);
    recyclerView = findViewById(R.id.recyclerView_character);
    recyclerView.setHasFixedSize(true);
    recyclerView.setLayoutManager(new linearlayoutmanager(this));
    adapter = new ParseAdapter(parseItems,this);
    recyclerView.setAdapter(adapter);

    Content content = new Content();
    content.execute();

}

private class Content extends AsyncTask<Void,Void,Void>{

    @Override
    protected void onPreExecute() {
        super.onPreExecute();
        progressBar.setVisibility(View.VISIBLE);
        progressBar.setAnimation(AnimationUtils.loadAnimation(CharacterList.this,android.R.anim.fade_in));
    }

    @Override
    protected void onPostExecute(Void aVoid) {
        super.onPostExecute(aVoid);
        progressBar.setVisibility(View.GONE);
        progressBar.setAnimation(AnimationUtils.loadAnimation(CharacterList.this,android.R.anim.fade_out));
        adapter.notifyDataSetChanged();
    }

    @Override
    protected void onCancelled() {
        super.onCancelled();
    }

    @Override
    protected Void doInBackground(Void... voids) {
        try{
            //website url
            String url = "https://myanimelist.net/character.PHP";
            Document doc = Jsoup.connect(url).get();
           
            Elements data = doc.select("tr.people");
            int size = data.size();
            for (int i = 0; i < size; i++){ 
                String imgurl = data.select("a.mr8.ml12.fl-l")
                        .select("img")
                        .eq(i)
                        .attr("src");

                String title = data.select("tr.mt24.di-ib.information")
                        .select("a.fw-b.fs14")
                        .eq(i)
                        .text();

                parseItems.add(new ParseItem(imgurl,title));
                Log.d("items","img: " +imgurl + " .title: " + title);
            }

        } catch (IOException e) {
            e.printstacktrace();
        }
        return null;
    }
}

我相信我的ParseAdapter和ParseItem可以正常工作,但是以防万一,在这里

public class ParseAdapter extends RecyclerView.Adapter<ParseAdapter.ViewHolder> {
private ArrayList<ParseItem> parseItems;
private Context context;
public ParseAdapter(ArrayList<ParseItem> parseItems,Context context){
    this.parseItems = parseItems;
    this.context = context;
}

@NonNull
@Override
public ParseAdapter.ViewHolder onCreateViewHolder(@NonNull ViewGroup parent,int viewType) {
    View view = LayoutInflater.from(parent.getContext()).inflate(R.layout.parse_item,parent,false);
    return new ViewHolder(view);
}

@Override
public void onBindViewHolder(@NonNull ParseAdapter.ViewHolder holder,int position) {
    ParseItem parseItem = parseItems.get(position);
    holder.textView.setText(parseItem.getTitle());
    Picasso
            //with vs get()
            .with(this.context)
            .load(parseItem.getimgurl())
            .into(holder.imageView);
}

@Override
public int getItemCount() {
    return parseItems.size();
}

public class ViewHolder extends RecyclerView.ViewHolder{

    ImageView imageView;
    TextView textView;

    public ViewHolder(@NonNull View itemView) {
        super(itemView);
        imageView = itemView.findViewById(R.id.imageView_character);
        textView = itemView.findViewById(R.id.textView_character);
    }
}
}



public class ParseItem {

private String imgurl;
private String title;

public ParseItem(){

}

public ParseItem(String imgurl,String title) {
    this.imgurl = imgurl;
    this.title = title;
}

public String getimgurl() {
    return imgurl;
}

public void setimgurl(String imgurl) {
    this.imgurl = imgurl;
}

public String getTitle() {
    return title;
}

public void setTitle(String title) {
    this.title = title;
}}

解决方法

我没想到。这是班级识别的部分,以防以后有人被卡住。

Elements data = doc.select("td.people");
            int size = data.size();
for (int i = 0; i < size; i++){
              
                String imgUrl = data.select("a.fl-l.ml12.mr8")
                       
                        .select("img")
                        .eq(i)
                    
                        .attr("data-src");

          
                String title = data.select("div.information.di-ib.mt24")
                
                        .select("a.fs14.fw-b")
                        .eq(i)
                        .text();

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。