如何解决从多个网站抓取数据的最佳方法是什么?
我尝试过的是,我使用排球请求从第一个网站抓取,并且在其中为每个网站创建了多个线程,并且在每个线程内使用jsoup connect方法抓取而不是截击。它可以完成工作,实际上更快。但是,问题在于,它会在剪贴数据时冻结应用程序,直到完全加载为止。它冻结进度条,我在查找原因时遇到问题。
这是我实现的代码。有点长。
// Checking the connection
final StringRequest request = new StringRequest("https://www.google.com/",new Response.Listener<String>() {
@Override
public void onResponse(String response) {
relativeLayout.setVisibility(View.GONE);
// instances for each required website
final HimalayanTimes himalayanTimes = new HimalayanTimes(getContext());
final GsmArena gsmArena = new GsmArena();
final CinemaBlend cinemaBlend = new CinemaBlend();
final KathmanduPost kathmanduPost = new KathmanduPost(getContext());
final GlobalNews globalNews = new GlobalNews();
final NepaliTimes nepaliTimes = new NepaliTimes(getContext());
final GoalNepal goalNepal = new GoalNepal(getContext());
final GadgetByte gadgetByte = new GadgetByte();
final TechLekh techLekh = new TechLekh();
final OnlineKhabar onlineKhabar = new OnlineKhabar();
final NepaliSansar nepaliSansar = new NepaliSansar();
final CricketingNepal cricketingNepal = new CricketingNepal();
// thread for each website
// thread fot thehimalayantimes
Thread thread = new Thread(new Runnable() {
@Override
public void run() {
try {
ArrayList<NewsItem> himalyannews;
himalyannews = himalayanTimes.getNews();
news.addAll(himalyannews);
for(int i=0; i<4; i++){
finalHeadlines.add(himalyannews.get(i));
}
} catch (Exception ignored) {
}
}
});
thread.start();
// thread for gsmArena
Thread thread1 = new Thread(new Runnable() {
@Override
public void run() {
try {
ArrayList<NewsItem> gsmarenanews;
gsmarenanews = gsmArena.getNews();
news.addAll(gsmarenanews);
for(int i=0; i<3; i++){
headlines.add(gsmarenanews.get(i));
}
} catch (Exception ignored) {
}
}
});
thread1.start();
// thread for cinemaBlend
Thread thread2 = new Thread(new Runnable() {
@Override
public void run() {
try {
ArrayList<NewsItem> cinemablendnews;
cinemablendnews = cinemaBlend.getNews();
news.addAll(cinemablendnews);
for(int i=0; i<4; i++){
headlines.add(cinemablendnews.get(i));
}
} catch (Exception ignored) {
}
}
});
thread2.start();
// thread for kathmanduPost
Thread thread3 = new Thread(new Runnable() {
@Override
public void run() {
try {
ArrayList<NewsItem> kathmandupostnews;
kathmandupostnews = kathmanduPost.getNews();
news.addAll(kathmandupostnews);
for(int i=0; i<3; i++){
finalHeadlines.add(kathmandupostnews.get(i));
}
} catch (IOException ignored) {
}
}
});
thread3.start();
// thread for globalNews
Thread thread4 = new Thread(new Runnable() {
@Override
public void run() {
try {
ArrayList<NewsItem> globalnewsnews;
globalnewsnews = globalNews.getNews();
news.addAll(globalnewsnews);
for(int i=0; i<5; i++){
finalHeadlines.add(globalnewsnews.get(i));
}
} catch (IOException ignored) {
}
}
});
thread4.start();
// thread for nepaliTimes
Thread thread5 = new Thread(new Runnable() {
@Override
public void run() {
try {
ArrayList<NewsItem> nepalitimesnews;
nepalitimesnews = nepaliTimes.getNews();
news.addAll(nepalitimesnews);
for(int i=0; i<3; i++){
finalHeadlines.add(nepalitimesnews.get(i));
}
} catch (IOException ignored) {
}
}
});
thread5.start();
// thread for GoalNepal
Thread thread6 = new Thread(new Runnable() {
@Override
public void run() {
try {
ArrayList<NewsItem> goalNepalNews;
goalNepalNews = goalNepal.getNews();
news.addAll(goalNepalNews);
for (int i=0; i<4; i++){
headlines.add(goalNepalNews.get(i));
}
} catch (IOException ignored) {
}
}
});
thread6.start();
// thread for GadgetByteNepal
Thread thread7 = new Thread(new Runnable() {
@Override
public void run() {
try {
ArrayList<NewsItem> gadgetbytenews;
gadgetbytenews = gadgetByte.getNews();
news.addAll(gadgetbytenews);
for (int i=0; i<3; i++){
headlines.add(gadgetbytenews.get(i));
}
} catch (IOException ignored) {
}
}
});
thread7.start();
// thread for Techlekh
Thread thread8 = new Thread(new Runnable() {
@Override
public void run() {
try {
ArrayList<NewsItem> techlekhnews;
techlekhnews = techLekh.getNews();
news.addAll(techlekhnews);
for (int i=0; i<3; i++){
headlines.add(techlekhnews.get(i));
}
} catch (IOException ignored) {
}
}
});
thread8.start();
// thread for onlinekhabar
Thread thread9 = new Thread(new Runnable() {
@Override
public void run() {
try {
ArrayList<NewsItem> onlineKhabarnews;
onlineKhabarnews = onlineKhabar.getNews();
news.addAll(onlineKhabarnews);
for (int i=0; i<4; i++){
finalHeadlines.add(onlineKhabarnews.get(i));
}
} catch (IOException ignored) {
}
}
});
thread9.start();
//thread for nepalisansar
Thread thread11 = new Thread(new Runnable() {
@Override
public void run() {
try {
ArrayList<NewsItem> nepalisansarnews;
nepalisansarnews = nepaliSansar.getNews();
news.addAll(nepalisansarnews);
for (int i=0; i<4; i++){
finalHeadlines.add(nepalisansarnews.get(i));
}
} catch (IOException ignored) {
}
}
});
thread11.start();
// thread for cricketingNepal
Thread thread12 = new Thread(new Runnable() {
@Override
public void run() {
try {
ArrayList<NewsItem> cricketnews;
cricketnews = cricketingNepal.getNews();
news.addAll(cricketnews);
for (int i=0; i<4; i++){
headlines.add(cricketnews.get(i));
}
} catch (IOException ignored) {
}
}
});
thread12.start();
// main thread wait for each thread to finish
try {
thread.join();
} catch (InterruptedException ignored) {
}
try {
thread1.join();
} catch (InterruptedException ignored) {
}
try {
thread2.join();
} catch (InterruptedException ignored) {
}
try {
thread3.join();
} catch (InterruptedException ignored) {
}
try {
thread4.join();
} catch (InterruptedException ignored) {
}
try {
thread5.join();
} catch (InterruptedException ignored) {
}
try {
thread6.join();
} catch (InterruptedException ignored) {
}
try {
thread7.join();
} catch (InterruptedException ignored) {
}
try {
thread8.join();
} catch (InterruptedException ignored) {
}
try {
thread8.join();
} catch (InterruptedException ignored) {
}
try {
thread9.join();
} catch (InterruptedException ignored) {
}
try {
thread11.join();
} catch (InterruptedException ignored) {
}
try {
thread12.join();
} catch (InterruptedException ignored) {
}
for(NewsItem item : news){
if (item.tag.contains("kathmandu"))
nepal.add(item);
if (item.tag.contains("cricket"))
sports.add(item);
if (item.tag.contains("football"))
sports.add(item);
switch (item.tag) {
case "nepal":
nepal.add(item);
break;
case "world":
world.add(item);
break;
case "sports":
sports.add(item);
break;
case "tech":
tech.add(item);
break;
case "entertainment":
entertainment.add(item);
break;
}
}
// putting each news item to the main container
Collections.shuffle(headlines);
Collections.shuffle(finalHeadlines);
finalHeadlines.addAll(headlines);
Collections.shuffle(nepal);
Collections.shuffle(world);
Collections.shuffle(sports);
Collections.shuffle(tech);
Collections.shuffle(entertainment);
tab1 t1 = new tab1(finalHeadlines);
t1.setRetainInstance(true);
tab2 t2 = new tab2(nepal);
t2.setRetainInstance(true);
tab3 t3 = new tab3(world);
t3.setRetainInstance(true);
tab4 t4 = new tab4(sports);
t4.setRetainInstance(true);
tab5 t5 = new tab5(tech);
t5.setRetainInstance(true);
tab6 t6 = new tab6(entertainment);
t6.setRetainInstance(true);
assert getFragmentManager() != null;
pagerAdapter = new PageAdapter(finalHeadlines,nepal,world,sports,tech,entertainment,getFragmentManager(),tabLayout.getTabCount());
viewPager.setAdapter(pagerAdapter);
shimmerFrameLayout.setVisibility(View.GONE);
}
},new Response.ErrorListener() {
@Override
public void onErrorResponse(VolleyError error) {
Toast.makeText(getContext(),"Internet Connection Error!",Toast.LENGTH_SHORT).show();
shimmerFrameLayout.setVisibility(View.GONE);
tabLayout.setVisibility(View.GONE);
}
});
queue.add(request);
我为每个网站上了课。以下类别之一:-
public class CinemaBlend {
ArrayList<NewsItem> news;
public CinemaBlend() {
news = new ArrayList<>();
}
@RequiresApi(api = Build.VERSION_CODES.KITKAT)
public ArrayList<NewsItem> getNews() throws IOException{
String url = "https://www.cinemablend.com/news.PHP";
OkHttpClient okHttpClient = new OkHttpClient();
Request request = new Request.Builder().url(url).get().build();
Document document = Jsoup.parse(Objects.requireNonNull(okHttpClient.newCall(request).execute().body()).string());
Elements articles = document.select("div.order-of-type-2").select("div.story-related").select("a");
for(Element article : articles)
{
String link = article.attr("href");
String title = article.attr("title");
String img = article.select("div.story-related-content").select("span.story-cover-image").select("img").attr("data-src");
String date = article.select("span.story-related-published-date").text();
NewsItem newsItem = new NewsItem();
newsItem.imgsrc = img;
newsItem.title = title;
newsItem.link = link;
newsItem.tag = "entertainment";
newsItem.publisher = "cinemablend.com";
newsItem.source_logo = "https://image.pitchbook.com/WFQVGYL17V0MevlcfQKlWjC3E8K1447542818374_200x200";
if(!date.equals(""))
{
newsItem.date = date + " ago";
news.add(newsItem);
}
}
return news;
}
}
解决方法
查找说明如何执行后台工作的教程。有很多不同的方法可以做到这一点:服务,Kotlin协程,简单的自我管理线程等。
只需远离有关AsyncTasks和Loaders的教程(不建议使用)。
Android开发人员指南是一个很好的起点:https://developer.android.com/guide/background
,但是在异步任务中执行任务会产生另一个类似的问题。当我在后台创建类似的线程时,在跳过数据且ui不会更新的情况下,会出现完全类似的问题。任何建议将不胜感激。
public class DownloadNews extends AsyncTask<Void,Void,Void>
{
@Override
protected void onPreExecute() {
shimmerFrameLayout.setVisibility(View.VISIBLE);
relativeLayout.setVisibility(View.GONE);
tabLayout.setVisibility(View.GONE);
}
@RequiresApi(api = Build.VERSION_CODES.KITKAT)
@Override
protected Void doInBackground(Void... voids) {
tabLayout.setOnTabSelectedListener(new TabLayout.OnTabSelectedListener() {
@Override
public void onTabSelected(TabLayout.Tab tab) {
viewPager.setCurrentItem(tab.getPosition());
}
@Override
public void onTabUnselected(TabLayout.Tab tab) {
}
@Override
public void onTabReselected(TabLayout.Tab tab) {
}
});
viewPager.addOnPageChangeListener(new TabLayout.TabLayoutOnPageChangeListener(tabLayout));
final RequestQueue queue = Volley.newRequestQueue(Objects.requireNonNull(getContext()));
// Checking the connection
final StringRequest request = new StringRequest("https://www.google.com/",new Response.Listener<String>() {
@Override
public void onResponse(String response) {
// instances for each required website
final HimalayanTimes himalayanTimes = new HimalayanTimes(getContext());
final GsmArena gsmArena = new GsmArena();
final CinemaBlend cinemaBlend = new CinemaBlend();
final KathmanduPost kathmanduPost = new KathmanduPost(getContext());
final GlobalNews globalNews = new GlobalNews();
final NepaliTimes nepaliTimes = new NepaliTimes(getContext());
final GoalNepal goalNepal = new GoalNepal(getContext());
final GadgetByte gadgetByte = new GadgetByte();
final TechLekh techLekh = new TechLekh();
final OnlineKhabar onlineKhabar = new OnlineKhabar();
final NepaliSansar nepaliSansar = new NepaliSansar();
final CricketingNepal cricketingNepal = new CricketingNepal();
// thread for each website
// thread fot thehimalayantimes
Thread thread = new Thread(new Runnable() {
@Override
public void run() {
try {
himalyannews = himalayanTimes.getNews();
news.addAll(himalyannews);
for(int i=0; i<4; i++){
finalHeadlines.add(himalyannews.get(i));
}
} catch (Exception ignored) {
}
}
});
thread.start();
// thread for gsmArena
Thread thread1 = new Thread(new Runnable() {
@Override
public void run() {
try {
gsmarenanews = gsmArena.getNews();
news.addAll(gsmarenanews);
for(int i=0; i<3; i++){
headlines.add(gsmarenanews.get(i));
}
} catch (Exception ignored) {
}
}
});
thread1.start();
// thread for cinemaBlend
Thread thread2 = new Thread(new Runnable() {
@RequiresApi(api = Build.VERSION_CODES.KITKAT)
@Override
public void run() {
try {
cinemablendnews = cinemaBlend.getNews();
news.addAll(cinemablendnews);
for(int i=0; i<4; i++){
headlines.add(cinemablendnews.get(i));
}
} catch (Exception ignored) {
}
}
});
thread2.start();
// thread for kathmanduPost
Thread thread3 = new Thread(new Runnable() {
@RequiresApi(api = Build.VERSION_CODES.KITKAT)
@Override
public void run() {
try {
kathmandupostnews = kathmanduPost.getNews();
news.addAll(kathmandupostnews);
for(int i=0; i<3; i++){
finalHeadlines.add(kathmandupostnews.get(i));
}
} catch (IOException ignored) {
}
}
});
thread3.start();
// thread for globalNews
Thread thread4 = new Thread(new Runnable() {
@Override
public void run() {
try {
globalnewsnews = globalNews.getNews();
news.addAll(globalnewsnews);
for(int i=0; i<5; i++){
finalHeadlines.add(globalnewsnews.get(i));
}
} catch (IOException ignored) {
}
}
});
thread4.start();
// thread for nepaliTimes
Thread thread5 = new Thread(new Runnable() {
@Override
public void run() {
try {
nepalitimesnews = nepaliTimes.getNews();
news.addAll(nepalitimesnews);
for(int i=0; i<3; i++){
finalHeadlines.add(nepalitimesnews.get(i));
}
} catch (IOException ignored) {
}
}
});
thread5.start();
// thread for GoalNepal
Thread thread6 = new Thread(new Runnable() {
@Override
public void run() {
try {
goalNepalNews = goalNepal.getNews();
news.addAll(goalNepalNews);
for (int i=0; i<4; i++){
headlines.add(goalNepalNews.get(i));
}
} catch (IOException ignored) {
}
}
});
thread6.start();
// thread for GadgetByteNepal
Thread thread7 = new Thread(new Runnable() {
@Override
public void run() {
try {
gadgetbytenews = gadgetByte.getNews();
news.addAll(gadgetbytenews);
for (int i=0; i<3; i++){
headlines.add(gadgetbytenews.get(i));
}
} catch (IOException ignored) {
}
}
});
thread7.start();
// thread for Techlekh
Thread thread8 = new Thread(new Runnable() {
@Override
public void run() {
try {
techlekhnews = techLekh.getNews();
news.addAll(techlekhnews);
for (int i=0; i<3; i++){
headlines.add(techlekhnews.get(i));
}
} catch (IOException ignored) {
}
}
});
thread8.start();
// thread for onlinekhabar
Thread thread9 = new Thread(new Runnable() {
@Override
public void run() {
try {
onlineKhabarnews = onlineKhabar.getNews();
news.addAll(onlineKhabarnews);
for (int i=0; i<4; i++){
finalHeadlines.add(onlineKhabarnews.get(i));
}
} catch (IOException ignored) {
}
}
});
thread9.start();
//thread for nepalisansar
Thread thread11 = new Thread(new Runnable() {
@Override
public void run() {
try {
nepalisansarnews = nepaliSansar.getNews();
news.addAll(nepalisansarnews);
for (int i=0; i<4; i++){
finalHeadlines.add(nepalisansarnews.get(i));
}
} catch (IOException ignored) {
}
}
});
thread11.start();
// thread for cricketingNepal
Thread thread12 = new Thread(new Runnable() {
@Override
public void run() {
try {
cricketnews = cricketingNepal.getNews();
news.addAll(cricketnews);
for (int i=0; i<4; i++){
headlines.add(cricketnews.get(i));
}
} catch (IOException ignored) {
}
}
});
thread12.start();
}
},new Response.ErrorListener() {
@Override
public void onErrorResponse(VolleyError error) {
Toast.makeText(getContext(),"Internet Connection Error!",Toast.LENGTH_SHORT).show();
shimmerFrameLayout.setVisibility(View.GONE);
relativeLayout.setVisibility(View.VISIBLE);
tabLayout.setVisibility(View.GONE);
}
});
queue.add(request);
return null;
}
@Override
protected void onPostExecute(Void aVoid) {
tabLayout.setVisibility(View.VISIBLE);
shimmerFrameLayout.setVisibility(View.GONE);
for(NewsItem item : news){
if (item.tag.contains("kathmandu"))
nepal.add(item);
switch (item.tag) {
case "nepal":
nepal.add(item);
break;
case "world":
world.add(item);
break;
case "sports":
sports.add(item);
break;
case "tech":
tech.add(item);
break;
case "entertainment":
entertainment.add(item);
break;
}
}
// putting each news item to the main container
Collections.shuffle(headlines);
Collections.shuffle(finalHeadlines);
finalHeadlines.addAll(headlines);
Collections.shuffle(nepal);
Collections.shuffle(world);
Collections.shuffle(sports);
Collections.shuffle(tech);
Collections.shuffle(entertainment);
tab1 t1 = new tab1(finalHeadlines);
t1.setRetainInstance(true);
tab2 t2 = new tab2(nepal);
t2.setRetainInstance(true);
tab3 t3 = new tab3(world);
t3.setRetainInstance(true);
tab4 t4 = new tab4(sports);
t4.setRetainInstance(true);
tab5 t5 = new tab5(tech);
t5.setRetainInstance(true);
tab6 t6 = new tab6(entertainment);
t6.setRetainInstance(true);
shimmerFrameLayout.setVisibility(View.GONE);
assert getFragmentManager() != null;
pagerAdapter = new PageAdapter(finalHeadlines,nepal,world,sports,tech,entertainment,getFragmentManager(),tabLayout.getTabCount());
viewPager.setAdapter(pagerAdapter);
}
}
适配器的代码与上面的代码完全相似。
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。