目录
Log
试验环境:
GPU: TITAN XpCPU: Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHzOS: Ubuntu 16.04Anaconda: conda 4.6.11Python: 3.5.6Tensorflow: 1.10.0
数据准备
更新:
关于生成TF record,在网上找到了另一个方法:分别对训练数据集和测试数据集运行下列脚本,得到train.record和val.record
# xml2csv.pyimport osimport globimport pandas as pdimport xml.etree.ElementTree as ETos.chdir('/root/proj_emotor/data/VOCdevkit/VOC2012/Annotations')path ='/root/proj_emotor/data/VOCdevkit/VOC2012/Annotations'def xml_to_csv(path): xml_list = [] for xml_file in glob.glob(path + '/*.xml'): tree = ET.parse(xml_file) root = tree.getroot() for member in root.findall('object'): value = (root.find('filename').text, int(root.find('size')[0].text), int(root.find('size')[1].text), member[0].text, int(member[4][0].text), int(member[4][1].text), int(member[4][2].text), int(member[4][3].text) ) xml_list.append(value) column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax'] xml_df = pd.DataFrame(xml_list, columns=column_name) return xml_dfdef main(): image_path = path xml_df = xml_to_csv(image_path) xml_df.to_csv('emotor_train.csv', index=None) print('Successfully converted xml to csv.')main()
# generate_tfrecord.py# -*- coding: utf-8 -*-"""Usage: # From tensorflow/models/ # Create train data: python generate_tfrecord.py --csv_input=data/tv_vehicle_labels.csv --output_path=train.record # Create test data: python generate_tfrecord.py --csv_input=data/test_labels.csv --output_path=test.record"""import osimport ioimport pandas as pdimport tensorflow as tffrom PIL import Imagefrom object_detection.utils import dataset_utilfrom collections import namedtuple, OrderedDictos.chdir('/root/proj_emotor/data')flags = tf.app.flagsflags.DEFINE_string('csv_input', '', 'Path to the CSV input')flags.DEFINE_string('output_path', '', 'Path to output TFRecord')FLAGS = flags.FLAGS# TO-DO replace this with label mapdef class_text_to_int(row_label): if row_label == 'emotor': # 需改动 return 1 else: Nonedef split(df, group): data = namedtuple('data', ['filename', 'object']) gb = df.groupby(group) return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]def create_tf_example(group, path): with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for index, row in group.object.iterrows(): xmins.append(row['xmin'] / width) xmaxs.append(row['xmax'] / width) ymins.append(row['ymin'] / height) ymaxs.append(row['ymax'] / height) classes_text.append(row['class'].encode('utf8')) classes.append(class_text_to_int(row['class'])) tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_exampledef main(_): writer = tf.python_io.TFRecordWriter(FLAGS.output_path) path = os.path.join(os.getcwd(), '/root/proj_emotor/data/VOCdevkit/VOC2012/JPEGImages') # 需改动 examples = pd.read_csv(FLAGS.csv_input) grouped = split(examples, 'filename') for group in grouped: tf_example = create_tf_example(group, path) writer.write(tf_example.SerializeToString()) writer.close() output_path = os.path.join(os.getcwd(), FLAGS.output_path) print('Successfully created the TFRecords: {}'.format(output_path))if __name__ == '__main__': tf.app.run()
原先
总共1700张未被标注的图片。随机选取800张做前期测试。
使用LabelImg进行手动标注,对象标签名设置为emotor。
标注的时候图片1-400所在文件夹名字为result
图片401-800所在文件夹名字为ImageSets 导致得到的.xml
中有属性: result 1.jpg F:\result\1.jpg
ImageSets 405.jpg /home/hzq0/VOC2012/ImageSets/405.jpg
其中的
ImageSets
在产生TFRecord的时候会有影响,后面会解释。
将所有得到的.xml
放在Anotations
文件夹中,将图片1-400放在result
文件夹中,图片401-800放在ImageSets
文件夹中。
执行脚本
import osimport randomtrainval_percent = 1train_percent = 0.5xmlfilepath = 'VOC2012\VOC2012\Annotations'txtsavepath = 'VOC2012\VOC2012\ImageSets\Main'total_xml = os.listdir(xmlfilepath)num=len(total_xml)list=range(num)tv=int(num*trainval_percent)tr=int(tv*train_percent)trainval= random.sample(list,tv)train = random.sample(trainval, tr)ftrainval = open('VOC2012\VOC2012\ImageSets\Main\\trainval.txt', 'w')ftest = open('VOC2012\VOC2012\ImageSets\Main\\test.txt', 'w')ftrain = open('VOC2012\VOC2012\ImageSets\Main\\train.txt', 'w')fval = open('VOC2012\VOC2012\ImageSets\Main\\val.txt', 'w')for i in list: name = total_xml[i][:-4] + ' '+ '1' + '\n' if i in trainval: ftrainval.write(name) if i in train: ftrain.write(name) else: fval.write(name) else: ftest.write(name) ftrainval.close()ftrain.close()fval.close()ftest.close()
用来创建ImageSets/Main
中的四个.txt
,在别处执行该脚本需自行修改其中的文件路径。
在创建tfrecord时,发现四个.txt
只用到两个。怎么修改后面再说。
在准备好所需要的几个文件夹之后,拷贝脚本 ,拷贝文件 到自己的工作目录。并且修改pascal_label_map.pbtxt
文件名为emotor_label_map.pbtxt
文件夹结构
我的工作目录叫做proj_emotor
,数据准备部分的工程结构为:
+ proj_emotor: + data: - create_pascal_tf_record.py - emotor_label_map.pbtxt + VOCdevkit: + ImageSets: + JPEGImages: - 401~800.jpg + result: + JPEGImages: - 1~400.jpg + VOC2012: + Anotations: - .xml + ImageSets: + Main: - emotor_train.txt - emotor_val.txt + JPEGImages: - all pics
前面我们都没有改过配置文件和脚本的内容,下面需要对它们做一些修改。
首先,删除emotor_label_map.pbtxt
的内容,修改为:
item { id: 1 name: 'emotor'}
然后,修改create_pascal_tf_record.py
的第 165 行,把aeroplane_
改为 emotor_
仔细看目录树,原来的ImageSets/Main
里面的四个.txt
只保留了train.txt
和 val.txt
并且在前面都增加了emotor_
,这么改的原因在的第 165 行。
修改完成后,就可以执行脚本了。
在这之前再解释一下为什么在VOCdevkit
多了result
和ImageSets
。
因为在使用LabelImage
打标签的时候,前400张是在 result
文件夹里打的,后400张是在ImageSets
里打的(这里是个失误,本意是想把文件夹命名为VOC2012
的),前面说了得到的.xml
里有这么一句话
ImageSets
因为这个多余的信息,导致在创建TFRecord
的时候会去先去对应的文件夹,再找JPEGImages
。所以以后标注数据时,应该统一将图片先放进 VOC2012
文件夹然后再打标签。
创建TFRecord
#From proj_emotor/datapython create_pascal_tf_record.py \ --label_map_path=pascal_label_map.pbtxt \ --data_dir=VOCdevkit --year=VOC2012 --set=train\ --output_path=emotor_train.recordpython create_pascal_tf_record.py \ --label_map_path=pascal_label_map.pbtxt \ --data_dir=VOCdevkit --year=VOC2012 --set=val\ --output_path=emotor_val.record
注意执行命令的位置。
成功之后在 data 目录下得到两个文件:
emotor_train.recordemotor_val.record
进行训练
准备工作
首先在 下载官方的预训练模型。
我用的是 ssd_mobilenet_v2_coco
,在proj_emotor/models/model
下解压。
现在proj_emotor
下文件夹结构:
+ proj_emotor + data: ... + models: - model_main.py - pipeline.config + model: + train + eval + ssd_mobilenet_v2_coco_2018_03_29
其中的 复制自其原始位置。
pipeline.config
拷贝自解压后得到的ssd_mobilenet_v2_coco_2018_03_29/pipeline.config
。
修改其中的内容,修改后内容为:
model { ssd { num_classes: 1 image_resizer { fixed_shape_resizer { height: 300 width: 300 } } feature_extractor { type: "ssd_mobilenet_v2" depth_multiplier: 1.0 min_depth: 16 conv_hyperparams { regularizer { l2_regularizer { weight: 3.99999989895e-05 } } initializer { truncated_normal_initializer { mean: 0.0 stddev: 0.0299999993294 } } activation: RELU_6 batch_norm { decay: 0.999700009823 center: true scale: true epsilon: 0.0010000000475 train: true } } batch_norm_trainable: true use_depthwise: true } box_coder { faster_rcnn_box_coder { y_scale: 10.0 x_scale: 10.0 height_scale: 5.0 width_scale: 5.0 } } matcher { argmax_matcher { matched_threshold: 0.5 unmatched_threshold: 0.5 ignore_thresholds: false negatives_lower_than_unmatched: true force_match_for_each_row: true } } similarity_calculator { iou_similarity { } } box_predictor { convolutional_box_predictor { conv_hyperparams { regularizer { l2_regularizer { weight: 3.99999989895e-05 } } initializer { truncated_normal_initializer { mean: 0.0 stddev: 0.0299999993294 } } activation: RELU_6 batch_norm { decay: 0.999700009823 center: true scale: true epsilon: 0.0010000000475 train: true } } min_depth: 0 max_depth: 0 num_layers_before_predictor: 0 use_dropout: false dropout_keep_probability: 0.800000011921 kernel_size: 3 box_code_size: 4 apply_sigmoid_to_scores: false } } anchor_generator { ssd_anchor_generator { num_layers: 6 min_scale: 0.20000000298 max_scale: 0.949999988079 aspect_ratios: 1.0 aspect_ratios: 2.0 aspect_ratios: 0.5 aspect_ratios: 3.0 aspect_ratios: 0.333299994469 } } post_processing { batch_non_max_suppression { score_threshold: 0.300000011921 iou_threshold: 0.600000023842 max_detections_per_class: 100 max_total_detections: 100 } score_converter: SIGMOID } normalize_loss_by_num_matches: true loss { localization_loss { weighted_smooth_l1 { } } classification_loss { weighted_sigmoid { } } hard_example_miner { num_hard_examples: 3000 iou_threshold: 0.990000009537 loss_type: CLASSIFICATION max_negatives_per_positive: 3 min_negatives_per_image: 3 } classification_weight: 1.0 localization_weight: 1.0 } }}train_config { batch_size: 24 data_augmentation_options { random_horizontal_flip { } } data_augmentation_options { ssd_random_crop { } } optimizer { rms_prop_optimizer { learning_rate { exponential_decay_learning_rate { initial_learning_rate: 0.00400000018999 decay_steps: 800720 decay_factor: 0.949999988079 } } momentum_optimizer_value: 0.899999976158 decay: 0.899999976158 epsilon: 1.0 } } fine_tune_checkpoint: "/root/proj_emotor/model/ssd_mobilenet_v2_coco_2018_03_29/model.ckpt" num_steps: 200000 fine_tune_checkpoint_type: "detection"}train_input_reader { label_map_path: "/root/proj_emotor/data/emotor_label_map.pbtxt" tf_record_input_reader { input_path: "/root/proj_emotor/data/emotor_train.record" }}eval_config { num_examples: 8000 max_evals: 10 use_moving_averages: false}eval_input_reader { label_map_path: "/root/proj_emotor/data/emotor_label_map.pbtxt" shuffle: false num_readers: 1 tf_record_input_reader { input_path: "/root/proj_emotor/data/emotor_val.record" }}
开始训练
# From the proj_emotor/models directorypython model_main.py \ --pipeline_config_path=pipline.config \ --model_dir=model \ --num_train_steps=80000 \ --sample_1_of_n_eval_examples=1 \ --alsologtostderr
训练期间可以使用TensorBoard实时查看训练的进度。
tensorboard --logdor={PATH TO LOG}
这里log的位置就是训练过程中日志输出的位置
训练结束之后,model文件夹中得到如下内容:
model├── checkpoint├── eval│ └── events.out.tfevents.1563524306.hzq├── events.out.tfevents.1563523648.hzq├── export│ └── Servo_0│ └── 1563551359│ ├── saved_model.pb│ └── variables│ ├── variables.data-00000-of-00001│ └── variables.index├── graph.pbtxt├── model.ckpt-74690.data-00000-of-00001├── model.ckpt-74690.index├── model.ckpt-74690.meta├── model.ckpt-76420.data-00000-of-00001├── model.ckpt-76420.index├── model.ckpt-76420.meta├── model.ckpt-78144.data-00000-of-00001├── model.ckpt-78144.index├── model.ckpt-78144.meta├── model.ckpt-79957.data-00000-of-00001├── model.ckpt-79957.index├── model.ckpt-79957.meta├── model.ckpt-80000.data-00000-of-00001├── model.ckpt-80000.index├── model.ckpt-80000.meta├── pipeline.config└── train
得到用于推理的模型
python export_inference_graph.py \ --input_type=image_tensor \ --pipeline_config_path=./model/pipeline.config \ --trained_checkpoint_prefix=/root/proj_emotor/models/model/model.ckpt-80000 \ --output_directory=./model/train/
其中的--pipeline_config_path
是训练得到的pipeline.config
,不是用于训练的pipeline.config
train├── checkpoint ├── frozen_inference_graph.pb ├── model.ckpt.data-00000-of-00001 ├── model.ckpt.index ├── model.ckpt.meta ├── pipeline.config └── saved_model ├── saved_model.pb └── variables
train文件夹中得到了用于推理的pb文件 frozen_inference_graph.pb。
推理
用于推理的代码:
import numpy as npimport osimport six.moves.urllib as urllibimport sysimport tarfileimport tensorflow as tfimport zipfilefrom distutils.version import StrictVersionfrom collections import defaultdictfrom io import StringIOfrom matplotlib import pyplot as pltfrom PIL import Image# This is needed since the notebook is stored in the object_detection folder.sys.path.append("..")from object_detection.utils import ops as utils_opsif StrictVersion(tf.__version__) < StrictVersion('1.9.0'): raise ImportError('Please upgrade your TensorFlow installation to v1.9.* or later!')# This is needed to display the images.#%matplotlib inlinefrom object_detection.utils import label_map_utilfrom object_detection.utils import visualization_utils as vis_util# What model to download.MODEL_NAME = '/root/proj_emotor/models/model/train/'#MODEL_FILE = MODEL_NAME + '.tar.gz'#DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'# Path to frozen detection graph. This is the actual model that is used for the object detection.PATH_TO_FROZEN_GRAPH = MODEL_NAME + 'frozen_inference_graph.pb'# List of the strings that is used to add correct label for each box.PATH_TO_LABELS = os.path.join('/root/proj_emotor/data', 'emotor_label_map.pbtxt')#opener = urllib.request.URLopener()#opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)#tar_file = tarfile.open(MODEL_FILE)#for file in tar_file.getmembers():# file_name = os.path.basename(file.name)# if 'frozen_inference_graph.pb' in file_name:# tar_file.extract(file, os.getcwd())detection_graph = tf.Graph()with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='')category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)def load_image_into_numpy_array(image): (im_width, im_height) = image.size return np.array(image.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8)# For the sake of simplicity we will use only 2 images:# image1.jpg# image2.jpg# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.PATH_TO_TEST_IMAGES_DIR = 'TEST_IMGs'TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, '{}.jpg'.format(i)) for i in range(1, 100) ]# Size, in inches, of the output images.IMAGE_SIZE = (12, 8)def run_inference_for_single_image(image, graph): with graph.as_default(): with tf.Session() as sess: # Get handles to input and output tensors ops = tf.get_default_graph().get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( tensor_name) if 'detection_masks' in tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image.shape[0], image.shape[1]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') # Run inference output_dict = sess.run(tensor_dict, feed_dict={image_tensor: np.expand_dims(image, 0)}) # all outputs are float32 numpy arrays, so convert types as appropriate output_dict['num_detections'] = int(output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict[ 'detection_classes'][0].astype(np.uint8) output_dict['detection_boxes'] = output_dict['detection_boxes'][0] output_dict['detection_scores'] = output_dict['detection_scores'][0] if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict['detection_masks'][0] return output_dictfor image_path in TEST_IMAGE_PATHS: image = Image.open(image_path) # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. image_np = load_image_into_numpy_array(image) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. output_dict = run_inference_for_single_image(image_np, detection_graph) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, line_thickness=8) plt.figure(figsize=IMAGE_SIZE) plt.imshow(image_np) plt.show()
问题
1
google.protobuf.text_format.ParseError: 35:7 : Message type "object_detection.protos.SsdFeatureExtractor" has no field named "batch_norm_trainable"
删除pipline.config中的
batch_norm_trainable: true
之后,得到了解决。但是不知道影响在哪,之前没遇到过。
解释
前面几次试验时的pipeline.config
是拷贝自 ,里面没有这个配置选项。具体这个选项什么作用,还不知道。
2
训练开始之后,有些参数没能使用checkpoints进行初始化
Use `tf.data.Dataset.batch(..., drop_remainder=True)`.W0718 14:56:12.331554 139700418553600 variables_helper.py:141] Variable [FeatureExtractor/MobilenetV2/Conv_1/BatchNorm/beta] is available in checkpoint, but has an incompatible shape with model variable. Checkpoint shape: [[1280]], model variable shape: [[256]]. This variable will not be initialized from the checkpoint.W0718 14:56:12.331802 139700418553600 variables_helper.py:141] Variable [FeatureExtractor/MobilenetV2/Conv_1/BatchNorm/gamma] is available in checkpoint, but has an incompatible shape with model variable. Checkpoint shape: [[1280]], model variable shape: [[256]]. This variable will not be initialized from the checkpoint.W0718 14:56:12.331923 139700418553600 variables_helper.py:141] Variable [FeatureExtractor/MobilenetV2/Conv_1/BatchNorm/moving_mean] is available in checkpoint, but has an incompatible shape with model variable. Checkpoint shape: [[1280]], model variable shape: [[256]]. This variable will not be initialized from the checkpoint.W0718 14:56:12.332051 139700418553600 variables_helper.py:141] Variable [FeatureExtractor/MobilenetV2/Conv_1/BatchNorm/moving_variance] is available in checkpoint, but has an incompatible shape with model variable. Checkpoint shape: [[1280]], model variable shape: [[256]]. This variable will not be initialized from the checkpoint.W0718 14:56:12.332157 139700418553600 variables_helper.py:141] Variable [FeatureExtractor/MobilenetV2/Conv_1/weights] is available in checkpoint, but has an incompatible shape with model variable. Checkpoint shape: [[1, 1, 320, 1280]], model variable shape: [[1, 1, 320, 256]]. This variable will not be initialized from the checkpoint.W0718 14:56:12.336651 139700418553600 variables_helper.py:141] Variable [FeatureExtractor/MobilenetV2/layer_19_1_Conv2d_2_1x1_256/weights] is available in checkpoint, but has an incompatible shape with model variable. Checkpoint shape: [[1, 1, 1280, 256]], model variable shape: [[1, 1, 256, 256]]. This variable will not be initialized from the checkpoint.