如何解决将字节流解析成大对象
我有一个字节流输入(大约 100MB)。我需要将字节流分析成一个大数据对象,其中包含 200 万个数据项对象(大小约为 50 字节)。
每个数据项都有成员,如 int、short 和其他对象。我已经尝试使用 Datainputstream
循环两百万次来解决这个问题,但需要几秒钟。一秒钟就能搞定吗?
这是示例:`
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
class DataItem {
private Part0 member0;
private Part1 member1;
private Part3 member3;
private Part4 member4;
private int member5;
private int member6;
public void setMember0(Part0 member) {
this.member0 = member;
}
public void setMember1(Part1 member) {
this.member1 = member;
}
public void setMember3(Part3 member) {
this.member3 = member;
}
public void setMember4(Part4 member) {
this.member4 = member;
}
public void setMember5(int member) {
this.member5 = member;
}
public void setMember6(int member) {
this.member6 = member;
}
}
class Part0 {
Part2 member1;
String member2;
public void setMember1(Part2 member) {
this.member1 = member;
}
public void setMember2(String member) {
this.member2 = member;
}
}
class Part1 {
short member1;
byte member2;
byte member3;
byte member4;
byte member5;
byte member6;
byte member7;
public void setMember5(byte member) {
this.member5 = member;
}
public void setMember6(byte member) {
this.member6 = member;
}
public void setMember7(byte member) {
this.member7 = member;
}
public void setMember1(short member) {
this.member1 = member;
}
public void setMember2(byte member) {
this.member2 = member;
}
public void setMember3(byte member) {
this.member3 = member;
}
public void setMember4(byte member) {
this.member4 = member;
}
}
class Part2 {
short member1;
short member2;
int member3;
byte member4;
byte member5;
short member6;
public void setMember1(short member) {
this.member1 = member;
}
public void setMember2(short member) {
this.member2 = member;
}
public void setMember3(int member) {
this.member3 = member;
}
public void setMember4(byte member) {
this.member4 = member;
}
public void setMember5(byte member) {
this.member5 = member;
}
public void setMember6(short member) {
this.member6 = member;
}
}
class Part3 {
short member1;
short member2;
public void setMember1(short member) {
this.member1 = member;
}
public void setMember2(short member) {
this.member2 = member;
}
}
class Part4 {
int member1;
short member2;
short member3;
public void setMember1(int member) {
this.member1 = member;
}
public void setzMember2(short member) {
this.member2 = member;
}
public void setMember3(short member) {
this.member3 = member;
}
}
public class testForHugeData {
public static void main(String[]args) throws IOException {
int runtimes = 2000000;
createFile();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
FileInputStream rd = new FileInputStream("test.txt");
BufferedInputStream ws = new BufferedInputStream(rd);
byte []buffer = new byte[1024];
int len;
while((len = ws.read(buffer,1024))!=-1) {
bos.write(buffer,len);
}
byte[] arr = bos.toByteArray();
System.out.println("a input byteStream sized "+arr.length +" is created");
ByteArrayInputStream bs = new ByteArrayInputStream(arr);
// create a datainputStream
DataInputStream ds = new DataInputStream(bs);
// create a bufferedInputStream
BufferedInputStream fs = new BufferedInputStream(ds);
runTaskForManyTimes(runtimes,ds,fs);
}
private static void runTaskForManyTimes(int runtimes,DataInputStream ds,BufferedInputStream fs) throws IOException {
HageData hugeData = new HageData();
long start = System.currentTimeMillis();
for(int i= 0;i<runtimes;i++) {
hugeData.addDataItems(taskUseDataInputStream(runtimes,ds));
}
System.out.println("use dataIuputStream to analyze byte stream:");
System.out.println(" it takes "+(System.currentTimeMillis()-start)+"ms to loop 2 million times");
HageData hugeData1 = new HageData();
start = System.currentTimeMillis();
for(int i= 0;i<runtimes;i ++) {
hugeData1.addDataItems(taskUseBufferedInputStream(runtimes,fs));
}
System.out.println("use bufferedIuputStream to analyze byte stream:");
System.out.println(" it takes "+(System.currentTimeMillis()-start)+"ms to loop 2 million times");
}
private static DataItem taskUseDataInputStream(int runtimes,DataInputStream ds) throws IOException {
DataItem item = new DataItem();
Part1 part1 = new Part1();
part1.setMember1(ds.readShort());
part1.setMember2(ds.readByte());
part1.setMember3(ds.readByte());
part1.setMember4(ds.readByte());
part1.setMember5(ds.readByte());
part1.setMember6(ds.readByte());
part1.setMember7(ds.readByte());
item.setMember1(part1);
Part0 part0 = new Part0();
Part2 part2 = new Part2();
part2.setMember1(ds.readShort());
part2.setMember3(ds.readInt());
part2.setMember5(ds.readByte());
part2.setMember2(ds.readShort());
part2.setMember6(ds.readShort());
part2.setMember4(ds.readByte());
byte[] tmp = new byte[10];
for(int i = 0; i< 10; i++) {
tmp[i] = ds.readByte();
}
part0.setMember1(part2);
part0.setMember2(new String(tmp));
item.setMember0(part0);
Part3 part3 = new Part3();
part3.setMember1(ds.readShort());
part3.setMember2(ds.readShort());
item.setMember3(part3);
Part4 part4 = new Part4();
part4.setMember1(ds.readInt());
part4.setzMember2(ds.readShort());
part4.setMember3(ds.readShort());
item.setMember4(part4);
item.setMember5(ds.readInt());
item.setMember6(ds.readInt());
return item;
}
private static DataItem taskUseBufferedInputStream(int runtimes,BufferedInputStream fs) throws IOException {
DataItem item = new DataItem();
Part1 part1 = new Part1();
part1.setMember1(readShort(fs));
part1.setMember2((byte)fs.read());
part1.setMember3((byte)fs.read());
part1.setMember4((byte)fs.read());
part1.setMember5((byte)fs.read());
part1.setMember6((byte)fs.read());
part1.setMember7((byte)fs.read());
item.setMember1(part1);
Part0 part0 = new Part0();
Part2 part2 = new Part2();
part2.setMember1(readShort(fs));
part2.setMember3(readInt(fs));
part2.setMember5((byte)fs.read());
part2.setMember2(readShort(fs));
part2.setMember6(readShort(fs));
part2.setMember4((byte)fs.read());
byte[] tmp = new byte[10];
for(int i = 0; i< 10; i++) {
tmp[i] = (byte)fs.read();
}
part0.setMember1(part2);
part0.setMember2(new String(tmp));
item.setMember0(part0);
Part3 part3 = new Part3();
part3.setMember1(readShort(fs));
part3.setMember2(readShort(fs));
item.setMember3(part3);
Part4 part4 = new Part4();
part4.setMember1(readInt(fs));
part4.setzMember2(readShort(fs));
part4.setMember3(readShort(fs));
item.setMember4(part4);
item.setMember5(readInt(fs));
item.setMember6(readInt(fs));
return item;
}
private static short readShort(BufferedInputStream fs) throws IOException {
// created to read short from BufferedInputStream
byte [] tmp = new byte[2];
tmp[0] = (byte)fs.read();
tmp[1] = (byte)fs.read();
return (short)(tmp[0]<<8|tmp[1]);
}
private static int readInt(BufferedInputStream fs) throws IOException {
// created to read int from BufferedInputStream
byte [] tmp = new byte[4];
tmp[0] = (byte)fs.read();
tmp[1] = (byte)fs.read();
tmp[2] = (byte)fs.read();
tmp[3] = (byte)fs.read();
return (int)(tmp[0]<<24|tmp[1]<<16|tmp[2]<<8|tmp[3]);
}
private static void createFile() throws IOException {
File file = new File("test.txt");
if(!file.exists()) {
file.createNewFile();
}
// so we create a random file sized 100,000,000 for test
RandomAccessFile file1 = new RandomAccessFile(file,"rw");
file1.setLength(100000000); //you can change size here
file1.close();
}
}
结果如下: `
a input byteStream sized 100000000 is created
use dataIuputStream to analyze byte stream:
it takes 4489ms to loop 2 million times
use bufferedIuputStream to analyze byte stream:
it takes 4686ms to loop 2 million times
所以看起来 bufferedStream 更慢? 但是当我将输入字节流的大小更改为 400M(通过将测试文件大小更改为 400M)时,结果是:`
a input byteStream sized 400000000 is created
use dataIuputStream to analyze byte stream:
it takes 4740ms to loop 2 million times
use bufferedIuputStream to analyze byte stream:
it takes 1384ms to loop 2 million times
因此,bufferedInputstream 的性能似乎取决于缓冲区大小。 反正时间成本太高了。
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。