如何解决Tensorflow TFWriter 不正确的数据序列化
我有一个使用 MatLab 的 ImageLabeller 创建的数据集,当尝试将数据集转换为 TFrecord 时,根据 here 中的说明,某些坐标不正确,看起来好像最小值大于最大限度。 我尝试删除失败的示例,但似乎错误与此无关,失败的示例总是出现在相同的位置。我尝试使用来自 MODD2 的图像和使用较大图像的 imageLabeller 创建的数据集,并且它可以正常工作。
# MODD2 format: x y w h -> x,y are the top left corner coordinates
def read_drone_mat_file(file_number):
# navigate to the modd2 directory
bBox_d = []
bBox_o = []
filename = []
# for each file,load it into data and append the obstacles information into the bBox list
mat = os.listdir(drones_dir)[file_number]
frame = os.path.join(drones_dir,mat)
data = sio.loadmat(frame)
for obj in data['drone']:
bBox_d.append(obj)
for obj in data['obstacles']:
bBox_o.append(obj)
filename.append(mat[0:9])
return bBox_d,bBox_o,filename
# %% Helper function to create a tfexample for the drone data
def create_drone_tfexample(drones,obstacles,index,image_path):
image_format = b'jpg'
filename = os.listdir(image_path)[index+2]
# load corresponding image (only use left images)
with tf.io.gfile.GFile(os.path.join(image_path,filename),'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = Image.open(encoded_jpg_io)
width,height = image.size
wsize,hsize = (width,height)
#basewidth = 640
# if width > basewidth:
# wpercent = (basewidth/float(image.size[0]))
# hsize = int((float(image.size[1])*float(wpercent)))
# wsize = basewidth
# image = image.resize((basewidth,hsize),Image.ANTIALIAS)
# buffered = io.BytesIO()
# image.save(buffered,format="JPEG")
# encoded_jpg = buffered.getvalue()
filename = os.path.splitext(filename)[0].encode('utf-8')
create_drone_tfexample.source_id += 1
source_id_s = "{}".format(create_drone_tfexample.source_id).encode('utf-8')
# tfrecord features deFinition
xmins = []
xmaxs = []
ymins = []
ymaxs = []
classes_text = []
classes = []
# for each image
for obj in drones:
xmins.append(obj[0] / width)
xmaxs.append((obj[0]+obj[2]) / width)
ymins.append(obj[1] / height)
ymaxs.append((obj[1]+obj[3]) / height)
# until the drone dataset is available,all obstacles are class 0
classes_text.append(bytes('drone','utf-8'))
classes.append(2)
for obj in obstacles:
xmins.append(obj[0] / width)
xmaxs.append((obj[0]+obj[2]) / width)
ymins.append(obj[1] / height)
ymaxs.append((obj[1]+obj[3]) / height)
# until the drone dataset is available,all obstacles are class 0
classes_text.append(bytes('obstacles','utf-8'))
classes.append(1)
print(source_id_s+b": "+filename)
# print("xmins: {}".format(xmins))
# print("xmaxs: {}".format(xmaxs))
# print("ymins: {}".format(ymins))
# print("ymaxs: {}".format(ymaxs))
# create tf_example
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(hsize),'image/width': dataset_util.int64_feature(wsize),'image/filename': dataset_util.bytes_feature(filename),'image/source_id': dataset_util.bytes_feature(source_id_s),'image/encoded': dataset_util.bytes_feature(encoded_jpg),'image/format': dataset_util.bytes_feature(image_format),'image/object/bBox/xmin': dataset_util.float_list_feature(xmins),'image/object/bBox/xmax': dataset_util.float_list_feature(xmaxs),'image/object/bBox/ymin': dataset_util.float_list_feature(ymins),'image/object/bBox/ymax': dataset_util.float_list_feature(ymaxs),'image/object/class/text': dataset_util.bytes_list_feature(classes_text),'image/object/class/label': dataset_util.int64_list_feature(classes),}))
return tf_example
create_drone_tfexample.source_id = 0
# %% Create final dataset WARNING: Slow and destructive
train_writer = tf.io.TFRecordWriter(
output_dir+'drone_train_truncated.tfrecord')
test_writer = tf.io.TFRecordWriter(output_dir+'drone_test_truncated.tfrecord')
drone_test_writer = tf.io.TFRecordWriter(
output_dir + 'drone_only_test.tfrecord')
create_drone_tfexample.source_id = 0
# Drones dataset
for index,mat in enumerate(os.listdir(drones_dir)):
Boxes_d,Boxes_o,filename = read_drone_mat_file(index)
print()
# Pass the bounding Boxes to the create_tfexample function
if index < 210:
image_path = drones_image_root
tf_example = create_drone_tfexample(
Boxes_d,image_path)
# Write the tf_example into the dataset
if random.randint(1,100) <= 80: # 80% Train 20% Validation
train_writer.write(tf_example.SerializetoString())
else:
test_writer.write(tf_example.SerializetoString())
drone_test_writer.write(tf_example.SerializetoString())
示例在尝试使用它们进行训练时失败,为了阅读示例,我使用以下代码:
# %% Extract images from dataset
dataset_file = "drone_only_test.tfrecord"
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
raw_dataset = tf.data.TFRecordDataset(
"<path_to_dataset>"+dataset_file)
print('_______________________________________________________________________________________')
image_feature_description = {
# 'image/height': dataset_util.int64_feature(hsize),# 'image/width': dataset_util.int64_feature(512),# 'image/filename': dataset_util.bytes_feature(filename),# 'image/source_id': dataset_util.bytes_feature(filename),# 'image/encoded': dataset_util.bytes_feature(encoded_jpg),# 'image/format': dataset_util.bytes_feature(image_format),# 'image/object/bBox/xmin': dataset_util.float_list_feature(xmins),# 'image/object/bBox/xmax': dataset_util.float_list_feature(xmaxs),# 'image/object/bBox/ymin': dataset_util.float_list_feature(ymins),# 'image/object/bBox/ymax': dataset_util.float_list_feature(ymaxs),# 'image/object/class/text': dataset_util.bytes_list_feature(classes_text),# 'image/object/class/label': dataset_util.int64_list_feature(classes),'image/height': tf.io.FixedLenFeature([],tf.int64),'image/width': tf.io.FixedLenFeature([],'image/filename': tf.io.FixedLenFeature([],tf.string),'image/source_id': tf.io.FixedLenFeature([],'image/encoded': tf.io.FixedLenFeature([],'image/format': tf.io.FixedLenFeature([],'image/object/bBox/xmin': tf.io.VarLenFeature(tf.float32),'image/object/bBox/xmax': tf.io.VarLenFeature(tf.float32),'image/object/bBox/ymin': tf.io.VarLenFeature(tf.float32),'image/object/bBox/ymax': tf.io.VarLenFeature(tf.float32),'image/object/class/text': tf.io.VarLenFeature(tf.string),'image/object/class/label': tf.io.VarLenFeature(tf.int64),}
def _parse_image_function(example_proto):
# Parse the input tf.train.Example proto using the dictionary above.
return tf.io.parse_single_example(example_proto,image_feature_description)
parsed_image_dataset = raw_dataset.map(_parse_image_function)
for image_features in parsed_image_dataset.take(10):
image_raw = image_features['image/encoded'].numpy()
display.display(display.Image(data=image_raw))
encoded_jpg_io = io.BytesIO(image_raw)
image = Image.open(encoded_jpg_io)
image.save("out.jpg",format="JPEG")
print(f'ID: {image_features["image/filename"]}')
print(f'XMIN: {image_features["image/object/bBox/xmin"].values*640}')
print(f'XMAX: {image_features["image/object/bBox/xmax"].values*640}')
print(f'YMIN: {image_features["image/object/bBox/ymin"].values*480}')
print(f'YMAX: {image_features["image/object/bBox/ymax"].values*480}')
print('---------------------')
print(
f'WIDTH: {image_features["image/object/bBox/xmax"].values*640 - image_features["image/object/bBox/xmin"].values*640}')
print(
f'HEIGHT: {image_features["image/object/bBox/ymax"].values*480 - image_features["image/object/bBox/ymin"].values*480}')
对于第四个位置的例子,输出如下:
ID: b'color_00000036'
XMIN: [179. 175. 5.]
XMAX: [387. 210. 21.]
YMIN: [263. 193. 242.]
YMAX: [372. 6. 248.]
---------------------
WIDTH: [208. 35. 16.]
HEIGHT: [ 109. -187. 6.]
相同图像的matlab输出如下:
ground_truth =
179 175 5
263 193 242
208 35 16
109 69 6
使用的版本如下:
- Windows 10 64 位
- Python 3.7.9 64 位
- TensorFlow 2.4.0
- Scipy 1.5.4
- Tensorflow 对象检测 API 大师
解决方法
问题最终与 scipy.io.loadmat() 将数据转换为 np.uint8 相关,解决方案是将 mat_dtype=True 作为参数传递,以便将所有内容加载为 np.float64。 不是最有效的方法,但它有效。
非常感谢。
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。