Tensorflow TFWriter 不正确的数据序列化

如何解决Tensorflow TFWriter 不正确的数据序列化

我有一个使用 MatLab 的 ImageLabeller 创建的数据集，当尝试将数据集转换为 TFrecord 时，根据 here 中的说明，某些坐标不正确，看起来好像最小值大于最大限度。我尝试删除失败的示例，但似乎错误与此无关，失败的示例总是出现在相同的位置。我尝试使用来自 MODD2 的图像和使用较大图像的 imageLabeller 创建的数据集，并且它可以正常工作。

用于生成 TFrecord 文件的代码如下：

# MODD2 format: x y w h -> x,y are the top left corner coordinates
def read_drone_mat_file(file_number):

    # navigate to the modd2 directory
    bBox_d = []
    bBox_o = []
    filename = []

    # for each file,load it into data and append the obstacles information into the bBox list
    mat = os.listdir(drones_dir)[file_number]
    frame = os.path.join(drones_dir,mat)
    data = sio.loadmat(frame)

    for obj in data['drone']:
        bBox_d.append(obj)

    for obj in data['obstacles']:
        bBox_o.append(obj)
    filename.append(mat[0:9])

    return bBox_d,bBox_o,filename

# %% Helper function to create a tfexample for the drone data
def create_drone_tfexample(drones,obstacles,index,image_path):

    image_format = b'jpg'
    filename = os.listdir(image_path)[index+2]

    # load corresponding image (only use left images)
    with tf.io.gfile.GFile(os.path.join(image_path,filename),'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width,height = image.size

    wsize,hsize = (width,height)
    #basewidth = 640
    # if width > basewidth:
    #     wpercent = (basewidth/float(image.size[0]))
    #     hsize = int((float(image.size[1])*float(wpercent)))
    #     wsize = basewidth
    #     image = image.resize((basewidth,hsize),Image.ANTIALIAS)
    #     buffered = io.BytesIO()
    #     image.save(buffered,format="JPEG")
    #     encoded_jpg = buffered.getvalue()

    filename = os.path.splitext(filename)[0].encode('utf-8')
    create_drone_tfexample.source_id += 1
    source_id_s = "{}".format(create_drone_tfexample.source_id).encode('utf-8')

    # tfrecord features deFinition
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    # for each image
    for obj in drones:
        xmins.append(obj[0] / width)
        xmaxs.append((obj[0]+obj[2]) / width)
        ymins.append(obj[1] / height)
        ymaxs.append((obj[1]+obj[3]) / height)
        # until the drone dataset is available,all obstacles are class 0
        classes_text.append(bytes('drone','utf-8'))
        classes.append(2)

    for obj in obstacles:
        xmins.append(obj[0] / width)
        xmaxs.append((obj[0]+obj[2]) / width)
        ymins.append(obj[1] / height)
        ymaxs.append((obj[1]+obj[3]) / height)
        # until the drone dataset is available,all obstacles are class 0
        classes_text.append(bytes('obstacles','utf-8'))
        classes.append(1)

    print(source_id_s+b": "+filename)
    # print("xmins: {}".format(xmins))
    # print("xmaxs: {}".format(xmaxs))
    # print("ymins: {}".format(ymins))
    # print("ymaxs: {}".format(ymaxs))

    # create tf_example
    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(hsize),'image/width': dataset_util.int64_feature(wsize),'image/filename': dataset_util.bytes_feature(filename),'image/source_id': dataset_util.bytes_feature(source_id_s),'image/encoded': dataset_util.bytes_feature(encoded_jpg),'image/format': dataset_util.bytes_feature(image_format),'image/object/bBox/xmin': dataset_util.float_list_feature(xmins),'image/object/bBox/xmax': dataset_util.float_list_feature(xmaxs),'image/object/bBox/ymin': dataset_util.float_list_feature(ymins),'image/object/bBox/ymax': dataset_util.float_list_feature(ymaxs),'image/object/class/text': dataset_util.bytes_list_feature(classes_text),'image/object/class/label': dataset_util.int64_list_feature(classes),}))

    return tf_example


create_drone_tfexample.source_id = 0

# %% Create final dataset WARNING: Slow and destructive
train_writer = tf.io.TFRecordWriter(
    output_dir+'drone_train_truncated.tfrecord')
test_writer = tf.io.TFRecordWriter(output_dir+'drone_test_truncated.tfrecord')
drone_test_writer = tf.io.TFRecordWriter(
    output_dir + 'drone_only_test.tfrecord')
create_drone_tfexample.source_id = 0

# Drones dataset
for index,mat in enumerate(os.listdir(drones_dir)):
    Boxes_d,Boxes_o,filename = read_drone_mat_file(index)
    print()
    # Pass the bounding Boxes to the create_tfexample function
    if index < 210:
        image_path = drones_image_root
        tf_example = create_drone_tfexample(
            Boxes_d,image_path)

    # Write the tf_example into the dataset
    if random.randint(1,100) <= 80:  # 80% Train  20% Validation
        train_writer.write(tf_example.SerializetoString())
    else:
        test_writer.write(tf_example.SerializetoString())
        drone_test_writer.write(tf_example.SerializetoString())

示例在尝试使用它们进行训练时失败，为了阅读示例，我使用以下代码：

# %% Extract images from dataset
dataset_file = "drone_only_test.tfrecord"
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
raw_dataset = tf.data.TFRecordDataset(
    "<path_to_dataset>"+dataset_file)

print('_______________________________________________________________________________________')
image_feature_description = {
    #             'image/height': dataset_util.int64_feature(hsize),#             'image/width': dataset_util.int64_feature(512),#             'image/filename': dataset_util.bytes_feature(filename),#             'image/source_id': dataset_util.bytes_feature(filename),#             'image/encoded': dataset_util.bytes_feature(encoded_jpg),#             'image/format': dataset_util.bytes_feature(image_format),#             'image/object/bBox/xmin': dataset_util.float_list_feature(xmins),#             'image/object/bBox/xmax': dataset_util.float_list_feature(xmaxs),#             'image/object/bBox/ymin': dataset_util.float_list_feature(ymins),#             'image/object/bBox/ymax': dataset_util.float_list_feature(ymaxs),#             'image/object/class/text': dataset_util.bytes_list_feature(classes_text),#             'image/object/class/label': dataset_util.int64_list_feature(classes),'image/height': tf.io.FixedLenFeature([],tf.int64),'image/width': tf.io.FixedLenFeature([],'image/filename': tf.io.FixedLenFeature([],tf.string),'image/source_id': tf.io.FixedLenFeature([],'image/encoded': tf.io.FixedLenFeature([],'image/format': tf.io.FixedLenFeature([],'image/object/bBox/xmin': tf.io.VarLenFeature(tf.float32),'image/object/bBox/xmax': tf.io.VarLenFeature(tf.float32),'image/object/bBox/ymin': tf.io.VarLenFeature(tf.float32),'image/object/bBox/ymax': tf.io.VarLenFeature(tf.float32),'image/object/class/text': tf.io.VarLenFeature(tf.string),'image/object/class/label': tf.io.VarLenFeature(tf.int64),}


def _parse_image_function(example_proto):
    # Parse the input tf.train.Example proto using the dictionary above.
    return tf.io.parse_single_example(example_proto,image_feature_description)


parsed_image_dataset = raw_dataset.map(_parse_image_function)

for image_features in parsed_image_dataset.take(10):
    image_raw = image_features['image/encoded'].numpy()
    display.display(display.Image(data=image_raw))
    encoded_jpg_io = io.BytesIO(image_raw)
    image = Image.open(encoded_jpg_io)
    image.save("out.jpg",format="JPEG")
    print(f'ID: {image_features["image/filename"]}')
    print(f'XMIN: {image_features["image/object/bBox/xmin"].values*640}')
    print(f'XMAX: {image_features["image/object/bBox/xmax"].values*640}')
    print(f'YMIN: {image_features["image/object/bBox/ymin"].values*480}')
    print(f'YMAX: {image_features["image/object/bBox/ymax"].values*480}')
    print('---------------------')
    print(
        f'WIDTH: {image_features["image/object/bBox/xmax"].values*640 - image_features["image/object/bBox/xmin"].values*640}')
    print(
        f'HEIGHT: {image_features["image/object/bBox/ymax"].values*480 - image_features["image/object/bBox/ymin"].values*480}')

对于第四个位置的例子，输出如下：

ID: b'color_00000036'
XMIN: [179. 175.   5.]
XMAX: [387. 210.  21.]
YMIN: [263. 193. 242.]
YMAX: [372.   6. 248.]
---------------------
WIDTH: [208.  35.  16.]
HEIGHT: [ 109. -187.    6.]

相同图像的matlab输出如下：

ground_truth =

   179   175     5
   263   193   242
   208    35    16
   109    69     6

使用的版本如下：

Windows 10 64 位
Python 3.7.9 64 位
TensorFlow 2.4.0
Scipy 1.5.4
Tensorflow 对象检测 API 大师

解决方法

问题最终与 scipy.io.loadmat() 将数据转换为 np.uint8 相关，解决方案是将 mat_dtype=True 作为参数传递，以便将所有内容加载为 np.float64。不是最有效的方法，但它有效。

非常感谢。