cyydjt 2020-05-11
dlib 的训练数据是一个测试文件夹和一个训练文件夹,分别放着若干图片和一个xml文件,xml文件保存了对应图片的标注信息。
dlibData: +---test | 1.jpg | 10.jpg | 11.jpg | ... | 56.jpg | 57.jpg | test.xml | \---train 1.jpg ... 95.jpg 96.jpg 97.jpg 98.jpg 99.jpg train.xml
Yolo的训练数据结构如下:
YOLOData:. | classes.names # 类别名称 | test.txt # 验证的图片路径 | train.txt # 训练的图片路径 | SplitData.py # 脚本文件, 对labels的标注文件**对应的图片**进行划分得到train.txt 和 test.txt (由于xml文件有些图片没有标注,但是labels的标注文件中肯定有标注) | xml2txt.py # 将train(test)下的图片转到JPEGImages中并随机命名, train.xml(test) 转成labels中的标注文件,图片对应标注文件 | YOLOData.data # 保存训练和验证的路径。。etc | +---JPEGImages | 0eQ2ARay.jpg | 0HzMbDSE.jpg | 0K7SYueV.jpg | ... | 0TIf1aij.jpg | 10QmnWfi.jpg | 1bVJ5Zkl.jpg | +---labels | 0eQ2ARay.txt | 0HzMbDSE.txt | 0K7SYueV.txt | ... | Zi2Ec8Tt.txt | znvQ045k.txt | zOvPyFtR.txt | ZViHLeBs.txt | +---TestYOLOData # 测试训练结果 | darknet-yolov3.cfg # yolo配置文件 | img2video.py # 将图片转化成视频的工具 | object_detection_yolo.py # 从视频中进行目标检测 | test.avi # 由图片生成的视频 | test_yolo_out_py.avi # 视频输出结果 | +---weights | darknet-yolov3_final.weights # 训练得到的权重文件 | +---test # dlib 数据格式的验证集 | 1.jpg | 10.jpg | 11.jpg | ... | 56.jpg | 57.jpg | test.xml | \---train # dlib 数据格式的训练集 1.jpg ... 95.jpg 96.jpg 97.jpg 98.jpg 99.jpg train.xml
xml2txt.py
‘‘‘ dlib .xml file to yolo .txt file python xml2txt.py dlib_train_path dlib_test_path example: python xml2txt.py /home/hichens/YOLOData/train/ /home/hichens/YOLOData/test/ ‘‘‘ import cv2 import os import subprocess import sys import random import string train_path = sys.argv[1] test_path = sys.argv[2] file_path = "/".join(train_path.split("/")[:-2]) subprocess.run([‘rm‘, ‘-rf‘, file_path + "/JPEGImages/"]) subprocess.run([‘mkdir‘, "JPEGImages"]) subprocess.run([‘rm‘, ‘-rf‘, file_path + "/labels/"]) subprocess.run([‘mkdir‘, "labels"]) def xml2txt(xml_path): base_path = "/".join(xml_path.split("/")[:-2]) I_path = "/".join(xml_path.split("/")[:-1]) with open(xml_path, ‘r‘) as f: for line in f: ss = line.split() if(len(ss) < 1): pass else: if(ss[0] == "<image"): img_name = line.split("‘")[1] print(img_name) if(ss[0] == "<box"): ll = line.split("‘") top, left, width, height = int(ll[1]), int(ll[3]), int(ll[5]), int(ll[7]) img_path = I_path + ‘/‘ + img_name # image int the xieshi_train or xieshi_test move_path = base_path + "/JPEGImages/" + img_name subprocess.run([‘cp‘, img_path, move_path]) # move the image to JPEGImages add_label = ‘‘.join(random.sample(string.ascii_letters + string.digits, 8)) new_name = base_path + "/JPEGImages/" + add_label + ‘.jpg‘ os.rename(move_path, new_name) # rename the imgage in the JPEGImages img = cv2.imread(img_path) H, W = img.shape[:2] x_center, y_center = (left+width) / (2*W), (top+height) / (2*H) w, h = width / W, height / H print(x_center, y_center, w, h) file_name = base_path + "/labels/" + add_label +".txt" # accoding to image name in the JPEGImages name the txt with open(file_name, ‘w‘) as file: sentence = " ".join(str(i) for i in [0, x_center, y_center, w, h]) file.write(sentence) if __name__ == "__main__": xml2txt(train_path + "train.xml") xml2txt(test_path + "test.xml")
SplitData.py
‘‘‘ from labels to split the data into train data and validation data python SplitData.py /home/hichens/YOLOData/ ‘‘‘ import random import os import subprocess import sys def split_data_set(base_path): label_dir = base_path + ‘labels‘ image_dir = base_path + ‘JPEGImages‘ f_val = open("eye_test.txt", ‘w‘) f_train = open("eye_train.txt", ‘w‘) path, dirs, files = next(os.walk(label_dir)) data_size = len(files) ind = 0 data_test_size = int(0.1 * data_size) test_array = random.sample(range(data_size), k=data_test_size) for f in os.listdir(label_dir): if(f.split(".")[1] == "txt"): ind += 1 file_name = f.split(".")[0] + ‘.jpg‘ if ind in test_array: f_val.write(image_dir+‘/‘+file_name+‘\n‘) else: f_train.write(image_dir+‘/‘+file_name+‘\n‘) if __name__ == "__main__": split_data_set(sys.argv[1])
img2video.py
‘‘‘ combine the images to video python img2video.py image_path exmpale: python img2video.py /home/hichens/YOLOData/test/ ‘‘‘ # encoding: UTF-8 import glob as gb import cv2 import sys in_path = sys.argv[1] img_path = gb.glob(in_path + "*") fps = 4 # the bigger the value is, the faster is the video. size = (640,480) # the image size videoWriter = cv2.VideoWriter(‘test.avi‘, cv2.VideoWriter_fourcc(‘I‘,‘4‘,‘2‘,‘0‘), fps, size) step = len(img_path) // 30 print("[", end="") for i, path in enumerate(img_path): if(i % step == 0): img = cv2.imread(path) img = cv2.resize(img,(640,480)) print(">", end="") videoWriter.write(img) print("]") print("OK!")
object_detection_yolo.py
‘‘‘ test the training result example: python object_detection_yolo.py --video=test.avi python object_detection_yolo.py --image=bird.jpg ‘‘‘ import cv2 as cv import argparse import sys import numpy as np import os.path # Initialize the parameters confThreshold = 0.5 #Confidence threshold nmsThreshold = 0.4 #Non-maximum suppression threshold inpWidth = 416 #608 #Width of network‘s input image inpHeight = 416 #608 #Height of network‘s input image parser = argparse.ArgumentParser(description=‘Object Detection using YOLO in OPENCV‘) parser.add_argument(‘--image‘, help=‘Path to image file.‘) parser.add_argument(‘--video‘, help=‘Path to video file.‘) args = parser.parse_args() # Load names of classes classesFile = "classes.names"; classes = None with open(classesFile, ‘rt‘) as f: classes = f.read().rstrip(‘\n‘).split(‘\n‘) # Give the configuration and weight files for the model and load the network using them. modelConfiguration = "darknet-yolov3.cfg"; modelWeights = "../weights/darknet-yolov3_800.weights"; net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights) net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) # Get the names of the output layers def getOutputsNames(net): # Get the names of all the layers in the network layersNames = net.getLayerNames() # Get the names of the output layers, i.e. the layers with unconnected outputs return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()] # Draw the predicted bounding box def drawPred(classId, conf, left, top, right, bottom): # Draw a bounding box. # cv.rectangle(frame, (left, top), (right, bottom), (255, 178, 50), 3) cv.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 3) label = ‘%.2f‘ % conf # Get the label for the class name and its confidence if classes: assert(classId < len(classes)) label = ‘%s:%s‘ % (classes[classId], label) #Display the label at the top of the bounding box labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1) top = max(top, labelSize[1]) cv.rectangle(frame, (left, top - round(1.5*labelSize[1])), (left + round(1.5*labelSize[0]), top + baseLine), (0, 0, 255), cv.FILLED) #cv.rectangle(frame, (left, top - round(1.5*labelSize[1])), (left + round(1.5*labelSize[0]), top + baseLine), (255, 255, 255), cv.FILLED) cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,0), 2) # Remove the bounding boxes with low confidence using non-maxima suppression def postprocess(frame, outs): frameHeight = frame.shape[0] frameWidth = frame.shape[1] classIds = [] confidences = [] boxes = [] # Scan through all the bounding boxes output from the network and keep only the # ones with high confidence scores. Assign the box‘s class label as the class with the highest score. classIds = [] confidences = [] boxes = [] for out in outs: print("out.shape : ", out.shape) for detection in out: #if detection[4]>0.001: scores = detection[5:] classId = np.argmax(scores) #if scores[classId]>confThreshold: confidence = scores[classId] if detection[4]>confThreshold: print(detection[4], " - ", scores[classId], " - th : ", confThreshold) print(detection) if confidence > confThreshold: center_x = int(detection[0] * frameWidth) center_y = int(detection[1] * frameHeight) width = int(detection[2] * frameWidth) height = int(detection[3] * frameHeight) left = int(center_x - width / 2) top = int(center_y - height / 2) classIds.append(classId) confidences.append(float(confidence)) boxes.append([left, top, width, height]) # Perform non maximum suppression to eliminate redundant overlapping boxes with # lower confidences. indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold) for i in indices: i = i[0] box = boxes[i] left = box[0] top = box[1] width = box[2] height = box[3] drawPred(classIds[i], confidences[i], left, top, left + width, top + height) # Process inputs winName = ‘Deep learning object detection in OpenCV‘ cv.namedWindow(winName, cv.WINDOW_NORMAL) outputFile = "yolo_out_py.avi" if (args.image): # Open the image file if not os.path.isfile(args.image): print("Input image file ", args.image, " doesn‘t exist") sys.exit(1) cap = cv.VideoCapture(args.image) outputFile = args.image[:-4]+‘_yolo_out_py.jpg‘ elif (args.video): # Open the video file if not os.path.isfile(args.video): print("Input video file ", args.video, " doesn‘t exist") sys.exit(1) cap = cv.VideoCapture(args.video) outputFile = args.video[:-4]+‘_yolo_out_py.avi‘ else: # Webcam input cap = cv.VideoCapture(0) # Get the video writer initialized to save the output video if (not args.image): vid_writer = cv.VideoWriter(outputFile, cv.VideoWriter_fourcc(‘M‘,‘J‘,‘P‘,‘G‘), 4, (round(cap.get(cv.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv.CAP_PROP_FRAME_HEIGHT)))) while cv.waitKey(1) < 0: # get frame from the video hasFrame, frame = cap.read() # Stop the program if reached end of video if not hasFrame: print("Done processing !!!") print("Output file is stored as ", outputFile) cv.waitKey(3000) break # Create a 4D blob from a frame. blob = cv.dnn.blobFromImage(frame, 1/255, (inpWidth, inpHeight), [0,0,0], 1, crop=False) # Sets the input to the network net.setInput(blob) # Runs the forward pass to get output of the output layers outs = net.forward(getOutputsNames(net)) # Remove the bounding boxes with low confidence postprocess(frame, outs) # Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes) t, _ = net.getPerfProfile() label = ‘Inference time: %.2f ms‘ % (t * 1000.0 / cv.getTickFrequency()) #cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255)) # Write the frame with the detection boxes if (args.image): cv.imwrite(outputFile, frame.astype(np.uint8)); else: vid_writer.write(frame.astype(np.uint8)) cv.imshow(winName, frame)