这一特殊领域被称为计算机视觉 (Computer Vision, CV),在现代生活中有着广泛的应用。
目标检测 (ObjectDetection) 也是计算机视觉最酷的应用之一,这是不容置疑的事实。
1. 设置要求:
2. 设置环境
git clonehttps://github.com/tensorflow/models.git
若有遗漏的组件,在运行环境中执行pip install即可。
谷歌的Protobuf,又称Protocol buffers,是一种语言无关、平台无关、可扩展的序列化结构数据的机制。Protobuf帮助程序员定义数据结构,轻松地在各种数据流中使用各种语言进行编写和读取结构数据。
cd models/research wget -Oprotobuf.zip https://github.com/protocolbuffers/protobuf/releases/download/v3.9.1/protoc-3.9.1-osx-x86_64.zip unzipprotobuf.zip
从research/ directory目录中执行如下命令编辑Protobuf编译器:
./bin/protoc object_detection/protos/*.proto--python_out=.
这个目录下有一个叫object_detection_tutorial.ipynb的ipython notebook。该文件是演示目标检测算法的demo,在执行时会用到指定的模型:
要检测直播视频中的目标还需要一些微调。在同一文件夹中新建一个Jupyter notebook,按照下面的代码操作:
import numpy as np import os import six.moves.urllib as urllib import sys import tarfile import tensorflow as tf import zipfile from distutils.version import StrictVersion from collections import defaultdict from io import StringIO from matplotlib import pyplot as plt from PIL import Image # This isneeded since the notebook is stored in the object_detection folder. sys.path.append("..") from utils import ops as utils_ops if StrictVersion(tf.__version__) < StrictVersion( 1.12.0 ): raise ImportError( Please upgrade your TensorFlow installation to v1.12.*. )
# This isneeded to display the images. get_ipython().run_line_magic( matplotlib , inline )
# Objectdetection imports # Here arethe imports from the object detection module. from utils import label_map_util from utils import visualization_utils as vis_util
# Modelpreparation # Anymodel exported using the `export_inference_graph.py` tool can be loaded heresimply by changing `PATH_TO_FROZEN_GRAPH` to point to a new .pb file. # Bydefault we use an "SSD with Mobilenet" model here. #See https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md #for alist of other models that can be run out-of-the-box with varying speeds andaccuracies. # Whatmodel to download. MODEL_NAME= ssd_mobilenet_v1_coco_2017_11_17 MODEL_FILE= MODEL_NAME + .tar.gz DOWNLOAD_BASE= http://download.tensorflow.org/models/object_detection/ # Path tofrozen detection graph. This is the actual model that is used for the objectdetection. PATH_TO_FROZEN_GRAPH= MODEL_NAME + /frozen_inference_graph.pb # List ofthe strings that is used to add correct label for each box. PATH_TO_LABELS= os.path.join( data , mscoco_label_map.pbtxt )
#DownloadModel opener =urllib.request.URLopener() opener.retrieve(DOWNLOAD_BASE+ MODEL_FILE, MODEL_FILE) tar_file =tarfile.open(MODEL_FILE) for file in tar_file.getmembers(): file_name= os.path.basename(file.name) if frozen_inference_graph.pb in file_name: tar_file.extract(file,os.getcwd())
# Load a(frozen) Tensorflow model into memory. detection_graph= tf.Graph() with detection_graph.as_default(): od_graph_def= tf.GraphDef() withtf.gfile.GFile(PATH_TO_FROZEN_GRAPH, rb ) as fid: serialized_graph= fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def,name= )
# Loadinglabel map # Labelmaps map indices to category names, so that when our convolution networkpredicts `5`, #we knowthat this corresponds to `airplane`. Here we use internal utilityfunctions, #butanything that returns a dictionary mapping integers to appropriate stringlabels would be fine category_index= label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS,use_display_name=True)
defrun_inference_for_single_image(image, graph): with graph.as_default(): with tf.Session() as sess: # Get handles to input and output tensors ops= tf.get_default_graph().get_operations() all_tensor_names= {output.name for op in ops for output in op.outputs} tensor_dict= {} for key in [ num_detections , detection_boxes , detection_scores , detection_classes , detection_masks ]: tensor_name= key + :0 if tensor_name in all_tensor_names: tensor_dict[key]= tf.get_default_graph().get_tensor_by_name(tensor_name) if detection_masks in tensor_dict: # The following processing is only for single image detection_boxes= tf.squeeze(tensor_dict[ detection_boxes ], [0]) detection_masks= tf.squeeze(tensor_dict[ detection_masks ], [0]) # Reframe is required to translate mask from boxcoordinates to image coordinates and fit the image size. real_num_detection= tf.cast(tensor_dict[ num_detections ][0], tf.int32) detection_boxes= tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks= tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed= utils_ops.reframe_box_masks_to_image_masks( detection_masks,detection_boxes, image.shape[1],image.shape[2]) detection_masks_reframed= tf.cast( tf.greater(detection_masks_reframed,0.5),tf.uint8) # Follow the convention by adding back the batchdimension tensor_dict[ detection_masks ] =tf.expand_dims( detection_masks_reframed,0) image_tensor= tf.get_default_graph().get_tensor_by_name( image_tensor:0 ) # Run inference output_dict= sess.run(tensor_dict, feed_dict={image_tensor: image}) # all outputs are float32 numpy arrays, so convert typesas appropriate output_dict[ num_detections ] =int(output_dict[ num_detections ][0]) output_dict[ detection_classes ] =output_dict[ detection_classes ][0].astype(np.int64) output_dict[ detection_boxes ] =output_dict[ detection_boxes ][0] output_dict[ detection_scores ] =output_dict[ detection_scores ][0] if detection_masks in output_dict: output_dict[ detection_masks ] =output_dict[ detection_masks ][0] return output_dict