손가락으로 글씨를 쓰고, 해당 수식을 gemini 모델로 계산하여 답을 나오게 하는 코드이다.
handTrackingModule.py
#Importing All the Required Libraries
import cv2
import mediapipe as mp
class handDetector():
def __init__(self, mode = False, max_hands = 1, model_complexity = 1, min_det_conf = 0.7, min_tracking_confidence = 0.7):
self.mode = mode
self.max_hands = max_hands
self.model_complexity = model_complexity
self.min_det_conf = min_det_conf
self.min_tracking_confidence = min_tracking_confidence
self.mphands = mp.solutions.hands
self.hands = self.mphands.Hands(self.mode, self.max_hands, self.model_complexity, self.min_det_conf, self.min_tracking_confidence)
self.mpDraw = mp.solutions.drawing_utils
self.tipIds = [4, 8, 12, 16, 20]
# 손 감지: 프레임에서 손을 찾고, 원하는 경우 손에 랜드마크를 그림
def findHands(self, img, draw=True):
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
self.results = self.hands.process(imgRGB)
if self.results.multi_hand_landmarks:
for handLMS in self.results.multi_hand_landmarks:
if draw:
self.mpDraw.draw_landmarks(img, handLMS, self.mphands.HAND_CONNECTIONS)
return img
# 손가락 위치 추적(오른손 왼손 구분)
def findPosition(self, img, draw=True):
myHand = {}
lmList = []
allHands = []
if self.results.multi_hand_landmarks:
for handType, handLMS in zip(self.results.multi_handedness, self.results.multi_hand_landmarks):
for id, lm in enumerate(handLMS.landmark):
h, w, c = img.shape
cx, cy, cz = int(lm.x * w), int(lm.y * h), int(lm.z * w)
lmList.append([id, cx, cy, cz])
myHand["lmList"] = lmList
if handType.classification[0].label == "Right":
myHand["type"] = "Left"
else:
myHand["type"] = "Right"
allHands.append(myHand)
if draw:
cv2.circle(img, (lmList[8][1], lmList[8][2]), 5, (255, 0,0), cv2.FILLED)
return allHands, img
# 손가락 펴졌는지 감지
def fingersUp(self, myHand):
"""
Finds how many fingers are open and returns in a list.
Considers left and right hands separately
:return: List of which fingers are up
"""
fingers = []
myHandType = myHand["type"]
lm_list = myHand["lmList"]
# Removing the first element from each sublist
myLmList = [sublist[1:] for sublist in lm_list]
# Printing the updated list
#print(myLmList)
if self.results.multi_hand_landmarks:
# Thumb
if myHandType == "Right":
if myLmList[self.tipIds[0]][0] > myLmList[self.tipIds[0] - 1][0]:
fingers.append(1)
else:
fingers.append(0)
else:
if myLmList[self.tipIds[0]][0] < myLmList[self.tipIds[0] - 1][0]:
fingers.append(1)
else:
fingers.append(0)
# 4 Fingers
for id in range(1, 5):
if myLmList[self.tipIds[id]][1] < myLmList[self.tipIds[id] - 2][1]:
fingers.append(1)
else:
fingers.append(0)
return fingers
def main():
#Create a Video Capture Object
cap = cv2.VideoCapture(0)
detector = handDetector()
while True:
ret, frame = cap.read()
if ret:
frame = detector.findHands(frame)
allHands, img = detector.findPosition(frame)
if allHands:
#print(allHands)
hand1 = allHands[0]
lmList = hand1["lmList"]
type = hand1["type"]
cv2.circle(frame, (lmList[4][1], lmList[4][2]), 5, (0, 255, 0), cv2.FILLED)
fingers = detector.fingersUp(hand1)
print(fingers)
print(f"H1 = {fingers.count(1)}", end = "")
cv2.imshow("Frame", frame)
if cv2.waitKey(1) & 0xFF == ord('1'):
break
else:
break
if __name__ == "__main__":
main()
math_gesture.py
실행할 때는 Terminald에 입력
streamlit run math_gestures.py
google gemini API Key받기
Google AI Studio
Google AI Studio is the fastest way to start building with Gemini, our next generation family of multimodal generative AI models.
aistudio.google.com
#Steps
#Run the Live Webcam Feed
#Draw the Hand Landmarks
#Create a Canvas
#Draw on the Canvas
#Send the Drawing to the AI Model
#Create a Streamlit Application
#---------------------------------
#Import All the Requried Libraries
import cv2
import numpy as np
from PIL import Image
import google.generativeai as genai
import handTrackingModule as ht
# Python 기반의 웹 애플리케이션 프레임워크
# 데이터 과학 및 머신러닝 모델을 쉽게 시각화하고 대화형 대시보드를 만들 수 있도록 도움
import streamlit as st
st.set_page_config(page_title="Math with Gestures using AI", layout = "wide")
#st.title("Virtual Calculator")
# Custom CSS to style the UI for elegance
st.markdown("""
<style>
.main {
background-color: #f5f5f5;
padding: 10px;
}
h1 {
margin-bottom: 0px;
}
.header {
text-align: center;
margin-top: -50px; /* Moved title up */
padding-bottom: 20px;
}
.video-feed {
margin-top: 0px;
}
</style>
""", unsafe_allow_html=True)
# Title of the Application moved higher with padding for elegance
st.markdown("<h1 class='header'>Virtual Calculator</h1>", unsafe_allow_html=True)
col1, col2 = st.columns([3,2])
with col1:
run = st.checkbox('Run', value = True)
FRAME_WINDOW = st.image([], use_container_width=True)
with col2:
st.header("Response from AI")
output_text_area = st.subheader("")
genai.configure(api_key="입력하기")
model = genai.GenerativeModel('gemini-1.5-flash')
#Create a Video Capture Object
cap = cv2.VideoCapture(0)
cap.set(3, 1280)
cap.set(4, 800)
detector = ht.handDetector()
canvas = None
prev_pos = None
def getHandInfo(frame):
frame = detector.findHands(frame)
hands, frame = detector.findPosition(frame, draw=True)
if hands:
hand = hands[0]
lmList = hand["lmList"]
fingers = detector.fingersUp(hand)
return fingers, lmList
else:
return None
def draw(info, prev_pos, canvas):
fingers, lmList = info
current_pos = None
if fingers == [0,1,0,0,0]:
current_pos = lmList[8][1:3]
if prev_pos is None:
prev_pos = current_pos
cv2.line(canvas, current_pos, prev_pos, (255, 0, 255), 10)
elif fingers == [1,0,0,0,0]:
canvas = np.zeros_like(frame)
return current_pos, canvas
def sendtoAI(model, canvas, fingers):
if fingers == [0,1,1,1,1]:
pil_image = Image.fromarray(canvas)
response = model.generate_content(["Solve this math problem and provide a detailed answer", pil_image])
return response.text
response = ""
while True:
ret, frame = cap.read()
frame = cv2.flip(frame, 1)
if ret:
info = getHandInfo(frame)
if canvas is None:
canvas = np.zeros_like(frame)
if info:
fingers, lmList = info
#print(fingers)
#print(lmList)
prev_pos, canvas = draw(info, prev_pos, canvas)
response = sendtoAI(model, canvas, fingers)
if response:
print("The response from AI Model", response)
frame_combined = cv2.addWeighted(frame, 0.7, canvas, 0.3, 0)
FRAME_WINDOW.image(frame_combined, channels = "BGR")
if response:
output_text_area.text(response)
#cv2.imshow("Live Webcam", frame_combined)
#cv2.imshow("Canvas", canvas)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
else:
break
cap.release()
cv2.destroyAllWindows()
'머신러닝 > openCV' 카테고리의 다른 글
openCV 기초, mediapipe 설치 에러 해결하기, dll문제 (0) | 2025.03.04 |
---|---|
OpenCV 기초, 스캐너 만들기 (0) | 2025.02.23 |
openCV 기초 shape detection, contour, approx, 다각형 찾기, bounding 그리기 (0) | 2025.02.22 |
openCV 기초, join, 사진 합치기 쌓기, 색깔 감지하기, bitwise_and, mask (0) | 2025.02.20 |
OpenCV 기초, Wrap perspective, flatten, 사진 펼치기, 스캔 (0) | 2025.02.19 |