OpenCV_12(딥러닝2)

SSD 얼굴 검출

import sys
import numpy as np
import cv2


model = 'opencv_face_detector/res10_300x300_ssd_iter_140000_fp16.caffemodel'
config = 'opencv_face_detector/deploy.prototxt'
# model = 'opencv_face_detector/opencv_face_detector_uint8.pb'
# config = 'opencv_face_detector/opencv_face_detector.pbtxt'

# cap = cv2.VideoCapture('utub.mp4')
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print('Camera open failed!')
    sys.exit()

net = cv2.dnn.readNet(model, config)

if net.empty():
    print('Net open failed!')
    sys.exit()

while True:
    ret, frame = cap.read()

    if not ret:
        break
    
    #1: 픽셀 0~255
    #resize 크기(300,300)
    #BGR 평균 (104,177,123) 
    blob = cv2.dnn.blobFromImage(frame, 1, (300, 300), (104, 177, 123))
    net.setInput(blob)
    out = net.forward() #(1,1,200,7) 에서 (200,7)만 필요함  *꼭 200이 아닐수 있음
    
    detect = out[0, 0, :, :] #detect 이차원 행렬을 분석한다.
    (h, w) = frame.shape[:2]

    for i in range(detect.shape[0]): #전체 행만큼 순회
        confidence = detect[i, 2] #confidence: 얼굴일 확률
        if confidence < 0.5:
            break
        # print('detect[i, 3]',detect[i, 3])
        #좌표가0~1로 정규화 되어 있으므로 크기를 직접 곱해서 좌표를 구한다    
        #x1,y1:좌측상단       
        x1 = int(detect[i, 3] * w)
        y1 = int(detect[i, 4] * h)
        
        # x2,y2: 우측하단
        x2 = int(detect[i, 5] * w)
        y2 = int(detect[i, 6] * h)

        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0))

        label = f'Face: {confidence:4.2f}'
        # (x1, y1-1): text 위치 지정
        cv2.putText(frame, label, (x1, y1-1), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 1, cv2.LINE_AA)

    cv2.imshow('frame', frame)

    if cv2.waitKey(1) == 27:
        break

cv2.destroyAllWindows()

yolo_v3 객체 검출

import sys
import numpy as np
import cv2


# 모델 & 설정 파일
model = 'yolo_v3/yolov3.weights'
config = 'yolo_v3/yolov3.cfg'
class_labels = 'yolo_v3/coco.names'
confThreshold = 0.5
nmsThreshold = 0.4

# 테스트 이미지 파일
img_files = ['person.jpg', 'sheep.jpg', 'kite.jpg']

# 네트워크 생성
net = cv2.dnn.readNet(model, config)

if net.empty():
    print('Net open failed!')
    sys.exit()

# 클래스 이름 불러오기

classes = [] #총 80개가 들어옴
with open(class_labels, 'rt') as f:
    classes = f.read().rstrip('\n').split('\n')

#low=0, high=255, size=(80,3)
#3은 RGB
colors = np.random.uniform(0, 255, size=(len(classes), 3)) #80개 각각 다른 색깔

# 출력 레이어 이름 받아오기
# 출력을 하는 3개의 layer 이름을 뽑는다
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
# output_layers = ['yolo_82', 'yolo_94', 'yolo_106'] 
'''
* 첫번째 82번 layer: 13*13영상 *3(RGB)=507에다가
컬럼은 총 85개 인데 순서대로 x,y,w,h,confidences,80개 클래스(해당 클래스일 확률이 들어감)
종합해서 507*8,
* 두번째 94번 layer: 2028,85
* 세번재 106번 layer: 8112,85
'''

# 실행

for f in img_files:
    img = cv2.imread(f)

    if img is None:
        continue

    # 블롭 생성 & 추론
    blob = cv2.dnn.blobFromImage(img, 1/255., (416, 416), swapRB=True)
    net.setInput(blob)

    
    #forward(문자열 리스트):문자열에 해당하는 레이어의 out을 outs에 전달
    outs = net.forward(output_layers)

    # outs는 3개의 ndarray 리스트.
    # 아래 값은 입력이 416일때
    # outs[0].shape=(507, 85), 13*13*3=507
    # outs[1].shape=(2028, 85), 26*26*3=2028
    # outs[2].shape=(8112, 85), 52*52*3=8112

    h, w = img.shape[:2]

    class_ids = []
    confidences = []
    boxes = []

    for out in outs:
        #하나의 행 씩 85개중 앞에 4개는 바운딩박스의 좌표값
        # 그 다음은 objectness Score, 나머지 80개는 Class Scores(80개 클래스에 대한 각각의 확률값)
        for detection in out:
            # detection: 4(bounding box) + 1(objectness_score) + 80(class confidence)
            scores = detection[5:] #80개 클래스 확률값 
            class_id = np.argmax(scores) #80개 클래스 중에서 가장 최대값을 가지는 확률값의 인덱스
            confidence = scores[class_id]
    
            if confidence > confThreshold: #클래스에 대한 확률값이 confThreshold 이상인 경우에만바운딩박스를 취합한다
                # 바운딩 박스 중심 좌표 & 박스 크기
                
                #cx,cy: 바운딩박스 센터좌표
                cx = int(detection[0] * w)
                cy = int(detection[1] * h)
                bw = int(detection[2] * w)
                bh = int(detection[3] * h)

                # 바운딩 박스 좌상단 좌표
                sx = int(cx - bw / 2)
                sy = int(cy - bh / 2)

                #boxes안에 객체 검출할때 쓰인 모든 박스 정보가 들어있다
                #이중에서 대표 박스 하나를 골라야 하므로 NMSBoxes를 사용해야한다
                boxes.append([sx, sy, bw, bh]) 
                confidences.append(float(confidence))
                class_ids.append(int(class_id)) #인식한 객체가 인덱스로 정리되어 있음

    # 비최대 억제
    '''
    한 객체에 여러 바운딩 박스가 있으므로 이 여러 박스 중 가장 좋은 박스를 뽑아내기
    예를들어 두 박스가 있는데 이 겹쳐진 부분이 nmsThreshold%만큼 겹쳐진 것들중
    confidence 값이 confThreshold이상인 것들중에 가장 큰 confidence를 갖는 하나만 골라서 
    indices에 넣기(몇번째 박스인지에 대한 정보)
    '''
    #정리:40% 이상 겹치는 바운딩 박스에 대해 최대 confidence (>0.5)바운딩 박스만 선별
    #nmsThreshold값이 0.99라면 99%이상의 공간을 같이 공유하는 박스들이 겹치게 될것
    # indices.shape=(N, 1)
    indices = cv2.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)

    for i in indices: #각 객체당 선별된 박스가 for loop를 통해 정보 추출
        # print(i)
        i = i[0]
        print
        sx, sy, bw, bh = boxes[i]
        label = f'{classes[class_ids[i]]}: {confidences[i]:.2}'
        color = colors[class_ids[i]]
        cv2.rectangle(img, (sx, sy, bw, bh), color, 2)
        cv2.putText(img, label, (sx, sy - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2, cv2.LINE_AA)

    #getPerfProfile: 실행시간 계산에 관련된 함수  
    t, _ = net.getPerfProfile()
    label = 'Inference time: %.2f ms' % (t * 1000.0 / cv2.getTickFrequency())
    cv2.putText(img, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                0.7, (0, 0, 255), 1, cv2.LINE_AA)

    cv2.imshow('img', img)
    cv2.waitKey()

cv2.destroyAllWindows()

Mask_rcnn

import sys
import numpy as np
import cv2


def drawBox(img, classId, conf, left, top, right, bottom):
    # Draw a bounding box.
    cv2.rectangle(img, (left, top), (right, bottom), colors[classId], 2)

    label = f'{classes[classId]}: {conf:.2f}'

    labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
    top = max(top, labelSize[1])
    cv2.rectangle(img, (left - 1, top - labelSize[1] - baseLine),
                  (left + labelSize[0], top), colors[classId], -1)
    cv2.putText(img, label, (left, top - baseLine), cv2.FONT_HERSHEY_SIMPLEX,
                0.6, (0, 0, 0), 1, cv2.LINE_AA)


# 모델 & 설정 파일
model = 'mask_rcnn/frozen_inference_graph.pb'
config = 'mask_rcnn/mask_rcnn_inception_v2_coco_2018_01_28.pbtxt'
class_labels = 'mask_rcnn/coco_90.names'
confThreshold = 0.6
maskThreshold = 0.3

# 테스트 이미지 파일
img_files = ['dog.jpg', 'traffic.jpg', 'sheep.jpg']

# 네트워크 생성
net = cv2.dnn.readNet(model, config)

if net.empty():
    print('Net open failed!')
    sys.exit()

# 클래스 이름 불러오기

classes = []
with open(class_labels, 'rt') as f:
    classes = f.read().rstrip('\n').split('\n')

colors = np.random.uniform(0, 255, size=(len(classes), 3))

# 전체 레이어 이름 받아오기
'''
객체를 바운딩박스 치는 output: (1,1,100,7) => 최대100개까지 객체 검출 가능 하며 7은 클래스 정보 또는 바운딩박스정보
바운딩 된 객체에서 윤곽선을 도출하는 output: (100,90,15,15) => 90개 class에 대한 15*15마스크가 출력된다
                                                        각각의 100개 객체에 대해서 90개의 마스크맵을 출력으로 준다   
'''

layer_names = net.getLayerNames() #네트워크의 모든 레이어 이름 가져오기
# net.getUnconnectedOutLayers(): 출력 레이어의 인덱스를 가져오기(인덱스는 332)
# output_layers: detection_masks 1개(출력레이어)
# 위 내용을 가지고 생각해 보면 네트워크 layer 중에서 마지막 출력 레이어는 네트워크 레이어에 포함되지 않는다는 걸로 연결됨
# 레이어 종류가 네트워크 레이어, 출력 레이어 이렇게 두개로 나뉜걸로 생각할수 있음(뇌피셜이라 확실치 않음)
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
for name in  layer_names:
    print(name)

# 실행

for f in img_files:
    img = cv2.imread(f)

    if img is None:
        continue

    # 블롭 생성 & 추론

    #blob의 사이즈는 지정 안해도 됨->이미지 크기와 동일한 크기로 만들어줌
    #평균값 디폴트는 0 이므로 여기서는 따로 지정 안함
    blob = cv2.dnn.blobFromImage(img, swapRB=True) #RGB로 학습되어 있음
    net.setInput(blob)

    #detection_out_final: 바운딩박스 정보
    #detection_masks: 각각의 바운딩 박스에서 마스크 정보를 가지고 있음
    #큰 순서: 각 객체의 바운딩박스 정보를 추출한 후에 그 박스 안에 있는 객체윤곽를 추출
    boxes, masks = net.forward(['detection_out_final', 'detection_masks'])

    # boxes.shape=(1, 1, 100, 7)
    # masks.shape=(100, 90, 15, 15)
    '''
    객체를 바운딩박스 치는 output: (1,1,5,7) => 5개 객체 검출함 클래스명을 직접 조사했지만 dog사진에 4개이상의 클래스명은 찾을수 없었다
                                                다시말해 같은 객체를 중복하여 검출했을 거라 생각해 볼수 있다. 
        바운딩 된 객체에서 윤곽선을 도출하는 output: (100,90,15,15) => 90개 class에 대한 15*15마스크가 출력된다
                                                         각각의 100개 객체에 대해서 90개의 마스크맵을 출력으로 준다   
    '''
    h, w = img.shape[:2]
    numClasses = masks.shape[1]  # 90(coco dataset의 class 개수)
    numDetections = boxes.shape[2]  # 5(detection 개수, 100개가 기본값이며 이보다 낮게 나올수도 있다)

    boxesToDraw = []
    for i in range(numDetections):
        box = boxes[0, 0, i]  # box.shape=(7,) 검출된 5개 객체의 바운딩 박스 정보를 가져온다
                              # (7,)짜리가 5개가 있는것이다.
        # classID: 몇번째에 대한 바운딩 박스 인가를 나타냄
        # confidence: 확률값(임계값보다 커야 제대로 찾았다고 인식한다)
        # 입력영상의 크기가 0~1 정규화가 되었다는 가정 하에 x1y1:좌측상단 , x2y2:우측하단
        #box: (0,classID,confidence, x1,y1,x2,y2) -> (7,)
        
        #i번째 객체 검출한것에 대해서 15*15 마스크 맵이 90개가 있는 것
        #90개 마스크를 다 쓰는게 아니라 94번줄의 ClassId에 해당하는 것만 사용
        mask = masks[i]  # mask.shape=(90, 15, 15)
        score = box[2] #confidence값
        if score > confThreshold: # 임계값보다 커야 제대로 찾은 것이다.
            classId = int(box[1]) #classID 추출
            #print(classId, classes[classId], score)

            #변환된 좌표
            x1 = int(w * box[3])
            y1 = int(h * box[4])
            x2 = int(w * box[5])
            y2 = int(h * box[6])

            #변환된 좌표와 원래 img크기를 비교해서
            #변환된 좌표가 본 이미지 크기보다 큰 경우를 없애기 위한 코드?
            x1 = max(0, min(x1, w - 1))
            y1 = max(0, min(y1, h - 1))
            x2 = max(0, min(x2, w - 1))
            y2 = max(0, min(y2, h - 1))

            boxesToDraw.append([img, classId, score, x1, y1, x2, y2])
            
            # classMask: 15*15짜리 행렬의 float형태의 마스크
            # 이 안에서 배경에 해당하는 부분은 값이 작을것이고, 객체에 해당하는 건 값이 크다
            classMask = mask[classId] #ClassID에 해당하는 mask 정보를 가져온다

            # 객체별 15x15 마스크를 바운딩 박스 크기로 resize한 후, 불투명 컬러로 표시
            classMask = cv2.resize(classMask, (x2 - x1 + 1, y2 - y1 + 1)) 
            
            #바운딩박스 안에는 배경과 객체가 있는데, 어떠한 기준값이 있어야 객체와 배경을 구분할수가 있다
            #이 구분하기 위한 기준 값이 maskThreshold (maskThreshold보다 크면 객체 아니면 배경)
            mask = (classMask > maskThreshold)

            #객체마다 다른 색깔로 불투명하게 하는 코드-> 결과영상에서 클래스 윤곽정보를 보여줌
            roi = img[y1:y2+1, x1:x2+1][mask] #원본 실제 객체 이미지
            img[y1:y2+1, x1:x2+1][mask] = (0.7 * colors[classId] + 0.3 * roi).astype(np.uint8)

    # 객체별 바운딩 박스 그리기 & 클래스 이름 표시-> 이게 없으면 결과영상에서 바운딩 박스가 없다
    for box in boxesToDraw:
        drawBox(*box)

    t, _ = net.getPerfProfile()
    label = 'Inference time: %.2f ms' % (t * 1000.0 / cv2.getTickFrequency())
    cv2.putText(img, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                0.7, (0, 0, 255), 1, cv2.LINE_AA)

    cv2.imshow('img', img)
    cv2.waitKey()

cv2.destroyAllWindows()

OpenPose

import sys
import numpy as np
import cv2


# 모델 & 설정 파일
model = 'openpose/pose_iter_440000.caffemodel'
config = 'openpose/pose_deploy_linevec.prototxt'

# 포즈 점 개수, 점 연결 개수, 연결 점 번호 쌍
nparts = 18 #전체 점의 개수
npairs = 17 # 점과 점 사이를 잇는 직선의 개수(관절표현)

#관절을 어떻게 이을건지를 설정해준다
pose_pairs = [(1, 2), (2, 3), (3, 4),  # 왼팔: 1번점 2번점, 2번점 3번점, 3번점 4번점을 연결하면 왼팔이된다
              (1, 5), (5, 6), (6, 7),  # 오른팔
              (1, 8), (8, 9), (9, 10),  # 왼쪽다리
              (1, 11), (11, 12), (12, 13),  # 오른쪽다리
              (1, 0), (0, 14), (14, 16), (0, 15), (15, 17)]  # 얼굴

# 테스트 이미지 파일
img_files = ['pose1.jpg', 'pose2.jpg', 'pose3.jpg']

# 네트워크 생성
net = cv2.dnn.readNet(model, config)

if net.empty():
    print('Net open failed!')
    sys.exit()

for f in img_files:
    img = cv2.imread(f)

    if img is None:
        continue

    # 블롭 생성 & 추론
    blob = cv2.dnn.blobFromImage(img, 1/255., (368, 368))
    net.setInput(blob)
    out = net.forward()  # out.shape=(1, 57, 46, 46) 46*46가 57개, 앞에서 18개만 사용(keypoint)

    h, w = img.shape[:2]

    # 검출된 점 추출
    points = []
    for i in range(nparts): #nparts:18

        #heatMap 46*46 짜리 행렬이된다
        #heatMap 모양: 관절 부분 point 부분에서 headmap생김
        heatMap = out[0, i, :, :] #46*46의 float32 행렬

        '''
        ##heatmap 시각화##
        heatImg = cv2.normalize(heatMap, None, 0, 255, cv2.NORM_MINMAX, cv2.CV_8U) #0~1 -> 0~255
        heatImg = cv2.resize(heatImg, (w, h))
        heatImg = cv2.cvtColor(heatImg, cv2.COLOR_GRAY2BGR)
        heatImg = cv2.addWeighted(img, 0.5, heatImg, 0.5, 0)
        cv2.imshow('heatImg', heatImg)
        cv2.waitKey()
        '''
        #point: 46*46 행렬에서의 최대값 위치
        _, conf, _, point = cv2.minMaxLoc(heatMap)

        #point 최대값 위치가 전체 영상에서는 어디에 위치해 있는가를 알기 위함->w,h 곱셈
        #입력영상 해상도에 맞는 특정 관절 위치 x,y => (21,9) 위치를 정 사이즈로 변환
        x = int(w * point[0] / out.shape[3]) #out.shape[3]:46,  point[0]:21
        y = int(h * point[1] / out.shape[2]) #out.shape[2]:46,  point[1]:9

        points.append((x, y) if conf > 0.1 else None)  # heat map threshold=0.1

    # 검출 결과 영상 만들기 
    for pair in pose_pairs:
        p1 = points[pair[0]]
        p2 = points[pair[1]]

        if p1 is None or p2 is None: #67번줄에서 None이 들어오면 무시
            continue

        cv2.line(img, p1, p2, (0, 255, 0), 3, cv2.LINE_AA)

        #각각의 끝점을 원으로 그리기
        cv2.circle(img, p1, 4, (0, 0, 255), -1, cv2.LINE_AA)
        cv2.circle(img, p2, 4, (0, 0, 255), -1, cv2.LINE_AA)

    # 추론 시간 출력
    t, _ = net.getPerfProfile()
    label = 'Inference time: %.2f ms' % (t * 1000.0 / cv2.getTickFrequency())
    cv2.putText(img, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                0.7, (0, 0, 255), 1, cv2.LINE_AA)

    cv2.imshow('img', img)
    cv2.waitKey()

cv2.destroyAllWindows()

'실습 note' 카테고리의 다른 글

bitcoin 예측 실습 (0)	2021.04.14
제조 공정 불량 검출 실습 (0)	2021.03.11
OpenCV_11(딥러닝) (0)	2021.03.04
OpenCV_10(머신러닝) (0)	2021.03.03
OpenCV_9(객체 추적과 모션벡터) (0)	2021.03.01

H_record

OpenCV_12(딥러닝2)

SSD 얼굴 검출

yolo_v3 객체 검출

Mask_rcnn

OpenPose

'실습 note' 카테고리의 다른 글

티스토리툴바

OpenCV_12(딥러닝2)

SSD 얼굴 검출

yolo_v3 객체 검출

Mask_rcnn

OpenPose

'실습 note' 카테고리의 다른 글

'실습 note' Related Articles

티스토리툴바