-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfind_person.py
220 lines (196 loc) · 11 KB
/
find_person.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
#*****************************************************
# *
# Copyright 2018 Amazon.com, Inc. or its affiliates. *
# All Rights Reserved. *
# *
#*****************************************************
""" A sample lambda for object detection"""
from threading import Thread, Event, Timer
import os
import json
import numpy as np
import awscam
import cv2
import greengrasssdk
import time
import base64
from datetime import datetime, timedelta
from botocore.session import Session
# Setup the S3 client
session = Session()
s3 = session.create_client('s3')
s3_bucket = os.environ['BUCKET_NAME']
#rekognition_collection_id = os.environ['REKOGNITION_COLLECTION_ID']
#rekognition = session.create_client('rekognition')
class LocalDisplay(Thread):
""" Class for facilitating the local display of inference results
(as images). The class is designed to run on its own thread. In
particular the class dumps the inference results into a FIFO
located in the tmp directory (which lambda has access to). The
results can be rendered using mplayer by typing:
mplayer -demuxer lavf -lavfdopts format=mjpeg:probesize=32 /tmp/results.mjpeg
"""
def __init__(self, resolution):
""" resolution - Desired resolution of the project stream """
# Initialize the base class, so that the object can run on its own
# thread.
super(LocalDisplay, self).__init__()
# List of valid resolutions
RESOLUTION = {'1080p' : (1920, 1080), '720p' : (1280, 720), '480p' : (858, 480)}
if resolution not in RESOLUTION:
raise Exception("Invalid resolution")
self.resolution = RESOLUTION[resolution]
# Initialize the default image to be a white canvas. Clients
# will update the image when ready.
self.frame = cv2.imencode('.jpg', 255*np.ones([640, 480, 3]))[1]
self.stop_request = Event()
def run(self):
""" Overridden method that continually dumps images to the desired
FIFO file.
"""
# Path to the FIFO file. The lambda only has permissions to the tmp
# directory. Pointing to a FIFO file in another directory
# will cause the lambda to crash.
result_path = '/tmp/results.mjpeg'
# Create the FIFO file if it doesn't exist.
if not os.path.exists(result_path):
os.mkfifo(result_path)
# This call will block until a consumer is available
with open(result_path, 'w') as fifo_file:
while not self.stop_request.isSet():
try:
# Write the data to the FIFO file. This call will block
# meaning the code will come to a halt here until a consumer
# is available.
fifo_file.write(self.frame.tobytes())
except IOError:
continue
def set_frame_data(self, frame):
""" Method updates the image data. This currently encodes the
numpy array to jpg but can be modified to support other encodings.
frame - Numpy array containing the image data of the next frame
in the project stream.
"""
ret, jpeg = cv2.imencode('.jpg', cv2.resize(frame, self.resolution))
if not ret:
raise Exception('Failed to set frame data')
self.frame = jpeg
def join(self):
self.stop_request.set()
def greengrass_infinite_infer_run():
""" Entry point of the lambda function"""
try:
# This object detection model is implemented as single shot detector (ssd), since
# the number of labels is small we create a dictionary that will help us convert
# the machine labels to human readable labels.
model_type = 'ssd'
output_map = {1: 'aeroplane', 2: 'bicycle', 3: 'bird', 4: 'boat', 5: 'bottle', 6: 'bus',
7 : 'car', 8 : 'cat', 9 : 'chair', 10 : 'cow', 11 : 'dinning table',
12 : 'dog', 13 : 'horse', 14 : 'motorbike', 15 : 'person',
16 : 'pottedplant', 17 : 'sheep', 18 : 'sofa', 19 : 'train',
20 : 'tvmonitor'}
# Create an IoT client for sending to messages to the cloud.
client = greengrasssdk.client('iot-data')
iot_topic = '$aws/things/{}/infer'.format(os.environ['AWS_IOT_THING_NAME'])
# Create a local display instance that will dump the image bytes to a FIFO
# file that the image can be rendered locally.
local_display = LocalDisplay('480p')
local_display.start()
# The sample projects come with optimized artifacts, hence only the artifact
# path is required.
model_path = '/opt/awscam/artifacts/mxnet_deploy_ssd_resnet50_300_FP16_FUSED.xml'
# Load the model onto the GPU.
client.publish(topic=iot_topic, payload='Loading object detection model')
model = awscam.Model(model_path, {'GPU': 1})
client.publish(topic=iot_topic, payload='Object detection model loaded')
# Set the threshold for detection
detection_threshold = 0.8
# The height and width of the training set images
input_height = 300
input_width = 300
lastmatch = datetime.utcnow()
# Do inference until the lambda is killed.
while True:
# Get a frame from the video stream
ret, frame = awscam.getLastFrame()
if not ret:
raise Exception('Failed to get frame from the stream')
# Resize frame to the same size as the training set.
frame_resize = cv2.resize(frame, (input_height, input_width))
# Run the images through the inference engine and parse the results using
# the parser API, note it is possible to get the output of doInference
# and do the parsing manually, but since it is a ssd model,
# a simple API is provided.
parsed_inference_results = model.parseResult(model_type,
model.doInference(frame_resize))
# Compute the scale in order to draw bounding boxes on the full resolution
# image.
yscale = float(frame.shape[0]/input_height)
xscale = float(frame.shape[1]/input_width)
# Dictionary to be filled with labels and probabilities for MQTT
cloud_output = {}
# Get the detected objects and probabilities
for obj in parsed_inference_results[model_type]:
if obj['prob'] > detection_threshold:
# Add bounding boxes to full resolution frame
xmin = int(xscale * obj['xmin']) \
+ int((obj['xmin'] - input_width/2) + input_width/2)
ymin = int(yscale * obj['ymin'])
xmax = int(xscale * obj['xmax']) \
+ int((obj['xmax'] - input_width/2) + input_width/2)
ymax = int(yscale * obj['ymax'])
# hold off for timespan between matches to let the system catch up
if datetime.utcnow() > (lastmatch + timedelta(seconds=1)):
try:
# if a person was found, upload the target area to S3 for further inspection
if output_map[obj['label']] == 'person':
# get the person image
person = frame[ymin:ymax, xmin:xmax]
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
gray = cv2.cvtColor(person, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.1, 5)
if len(faces) != 1:
client.publish(topic=iot_topic, payload="Skipping, no faces")
continue
#image = base64.b64encode(person)
#resp = client.search_faces_by_image(
# CollectionId=rekognition_collection_id,
# Image=image,
# MaxFaces=1,
# FaceMatchThreshold=70)
#if len(resp['FaceMatches']) > 0:
# client.publish(topic=iot_topic, payload=resp)
# create a nice s3 file key
s3_key = datetime.utcnow().strftime('%Y-%m-%d_%H_%M_%S.%f') + '.jpg'
encode_param=[int(cv2.IMWRITE_JPEG_QUALITY), 90] # 90% should be more than enough
_, jpg_data = cv2.imencode('.jpg', person, encode_param)
filename = "incoming/%s" % s3_key # the guess lambda function is listening here
response = s3.put_object(ACL='public-read', Body=jpg_data.tostring(),Bucket=s3_bucket,Key=filename)
# reset the timer for the next match
lastmatch = datetime.utcnow()
except Exception as ex:
client.publish(topic=iot_topic, payload='Error in person finder to S3 block: {}'.format(ex))
# See https://docs.opencv.org/3.4.1/d6/d6e/group__imgproc__draw.html
# for more information about the cv2.rectangle method.
# Method signature: image, point1, point2, color, and tickness.
cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 165, 20), 10)
# Amount to offset the label/probability text above the bounding box.
text_offset = 15
# See https://docs.opencv.org/3.4.1/d6/d6e/group__imgproc__draw.html
# for more information about the cv2.putText method.
# Method signature: image, text, origin, font face, font scale, color,
# and tickness
cv2.putText(frame, "{}: {:.2f}%".format(output_map[obj['label']],
obj['prob'] * 100),
(xmin, ymin-text_offset),
cv2.FONT_HERSHEY_SIMPLEX, 2.5, (255, 165, 20), 6)
# Store label and probability to send to cloud
cloud_output[output_map[obj['label']]] = obj['prob']
# Set the next frame in the local display stream.
local_display.set_frame_data(frame)
# Send results to the cloud
if cloud_output != {}:
client.publish(topic=iot_topic, payload=json.dumps(cloud_output))
except Exception as ex:
client.publish(topic=iot_topic, payload='Error in object detection lambda: {}'.format(ex))
greengrass_infinite_infer_run()