머신러닝/openCV

openCV 중급 손가락으로 그리기

Olivia-BlackCherry 2025. 3. 5. 21:40


손가락으로 글씨를 쓰고, 해당 수식을 gemini 모델로 계산하여 답을 나오게 하는 코드이다.

 

 

handTrackingModule.py

#Importing All the Required Libraries
import cv2
import mediapipe as mp


class handDetector():
    def __init__(self, mode = False, max_hands = 1, model_complexity = 1, min_det_conf = 0.7, min_tracking_confidence = 0.7):
        self.mode = mode
        self.max_hands = max_hands
        self.model_complexity = model_complexity
        self.min_det_conf = min_det_conf
        self.min_tracking_confidence = min_tracking_confidence
        self.mphands = mp.solutions.hands
        self.hands = self.mphands.Hands(self.mode, self.max_hands, self.model_complexity, self.min_det_conf, self.min_tracking_confidence)
        self.mpDraw = mp.solutions.drawing_utils
        self.tipIds = [4, 8, 12, 16, 20]

    # 손 감지: 프레임에서 손을 찾고, 원하는 경우 손에 랜드마크를 그림
    def findHands(self, img, draw=True):
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.results = self.hands.process(imgRGB)
        if self.results.multi_hand_landmarks:
            for handLMS in self.results.multi_hand_landmarks:
                if draw:
                    self.mpDraw.draw_landmarks(img, handLMS, self.mphands.HAND_CONNECTIONS)
        return img

    # 손가락 위치 추적(오른손 왼손 구분)
    def findPosition(self, img, draw=True):
        myHand = {}
        lmList = []
        allHands = []
        if self.results.multi_hand_landmarks:
            for handType, handLMS in zip(self.results.multi_handedness, self.results.multi_hand_landmarks):
                for id, lm in enumerate(handLMS.landmark):
                    h, w, c = img.shape
                    cx, cy, cz = int(lm.x * w), int(lm.y * h), int(lm.z * w)
                    lmList.append([id, cx, cy, cz])
                myHand["lmList"] = lmList
                if handType.classification[0].label == "Right":
                    myHand["type"] = "Left"
                else:
                    myHand["type"] = "Right"
                allHands.append(myHand)
            if draw:
                cv2.circle(img, (lmList[8][1], lmList[8][2]), 5, (255, 0,0), cv2.FILLED)
        return allHands, img

    # 손가락 펴졌는지 감지
    def fingersUp(self, myHand):
        """
        Finds how many fingers are open and returns in a list.
        Considers left and right hands separately
        :return: List of which fingers are up
        """
        fingers = []
        myHandType = myHand["type"]
        lm_list = myHand["lmList"]
        # Removing the first element from each sublist
        myLmList = [sublist[1:] for sublist in lm_list]

        # Printing the updated list
        #print(myLmList)
        if self.results.multi_hand_landmarks:

            # Thumb
            if myHandType == "Right":
                if myLmList[self.tipIds[0]][0] > myLmList[self.tipIds[0] - 1][0]:
                    fingers.append(1)
                else:
                    fingers.append(0)
            else:
                if myLmList[self.tipIds[0]][0] < myLmList[self.tipIds[0] - 1][0]:
                    fingers.append(1)
                else:
                    fingers.append(0)

            # 4 Fingers
            for id in range(1, 5):
                if myLmList[self.tipIds[id]][1] < myLmList[self.tipIds[id] - 2][1]:
                    fingers.append(1)
                else:
                    fingers.append(0)
        return fingers

def main():
    #Create a Video Capture Object
    cap = cv2.VideoCapture(0)
    detector = handDetector()
    while True:
        ret, frame = cap.read()
        if ret:
            frame = detector.findHands(frame)
            allHands, img = detector.findPosition(frame)
            if allHands:
                #print(allHands)
                hand1 = allHands[0]
                lmList = hand1["lmList"]
                type = hand1["type"]
                cv2.circle(frame, (lmList[4][1], lmList[4][2]), 5, (0, 255, 0), cv2.FILLED)
                fingers = detector.fingersUp(hand1)
                print(fingers)
                print(f"H1 = {fingers.count(1)}", end = "")
            cv2.imshow("Frame", frame)
            if cv2.waitKey(1) & 0xFF == ord('1'):
                break
        else:
            break


if __name__ == "__main__":
    main()

 

 

math_gesture.py

실행할 때는 Terminald에 입력

streamlit run math_gestures.py

 

google gemini API Key받기

https://aistudio.google.com/

 

Google AI Studio

Google AI Studio is the fastest way to start building with Gemini, our next generation family of multimodal generative AI models.

aistudio.google.com

 

#Steps
#Run the Live Webcam Feed
#Draw the Hand Landmarks
#Create a Canvas
#Draw on the Canvas
#Send the Drawing to the AI Model
#Create a Streamlit Application
#---------------------------------
#Import All the Requried Libraries
import cv2
import numpy as np
from PIL import Image
import google.generativeai as genai
import handTrackingModule as ht
# Python 기반의 웹 애플리케이션 프레임워크
# 데이터 과학 및 머신러닝 모델을 쉽게 시각화하고 대화형 대시보드를 만들 수 있도록 도움
import streamlit as st

st.set_page_config(page_title="Math with Gestures using AI", layout = "wide")
#st.title("Virtual Calculator")
# Custom CSS to style the UI for elegance
st.markdown("""
    <style>
    .main {
        background-color: #f5f5f5;
        padding: 10px;
    }
    h1 {
        margin-bottom: 0px;
    }
    .header {
        text-align: center;
        margin-top: -50px;  /* Moved title up */
        padding-bottom: 20px;
    }
    .video-feed {
        margin-top: 0px;
    }
    </style>
""", unsafe_allow_html=True)
# Title of the Application moved higher with padding for elegance
st.markdown("<h1 class='header'>Virtual Calculator</h1>", unsafe_allow_html=True)

col1, col2 = st.columns([3,2])

with col1:
    run = st.checkbox('Run', value = True)
    FRAME_WINDOW = st.image([], use_container_width=True)
with col2:
    st.header("Response from AI")
    output_text_area = st.subheader("")


genai.configure(api_key="입력하기")
model = genai.GenerativeModel('gemini-1.5-flash')

#Create a Video Capture Object
cap = cv2.VideoCapture(0)
cap.set(3, 1280)
cap.set(4, 800)

detector = ht.handDetector()
canvas = None
prev_pos = None

def getHandInfo(frame):
    frame = detector.findHands(frame)
    hands, frame = detector.findPosition(frame, draw=True)
    if hands:
        hand = hands[0]
        lmList = hand["lmList"]
        fingers = detector.fingersUp(hand)
        return fingers, lmList
    else:
        return None

def draw(info, prev_pos, canvas):
    fingers, lmList = info
    current_pos = None
    if fingers == [0,1,0,0,0]:
        current_pos = lmList[8][1:3]
        if prev_pos is None:
            prev_pos = current_pos
        cv2.line(canvas, current_pos, prev_pos, (255, 0, 255), 10)
    elif fingers == [1,0,0,0,0]:
        canvas = np.zeros_like(frame)
    return current_pos, canvas

def sendtoAI(model, canvas, fingers):
    if fingers == [0,1,1,1,1]:
        pil_image = Image.fromarray(canvas)
        response = model.generate_content(["Solve this math problem and provide a detailed answer", pil_image])
        return response.text
response = ""
while True:
    ret, frame = cap.read()
    frame = cv2.flip(frame, 1)
    if ret:
        info = getHandInfo(frame)
        if canvas is None:
            canvas = np.zeros_like(frame)
        if info:
            fingers, lmList = info
            #print(fingers)
            #print(lmList)
            prev_pos, canvas = draw(info, prev_pos, canvas)
            response = sendtoAI(model, canvas, fingers)
            if response:
                print("The response from AI Model", response)
        frame_combined = cv2.addWeighted(frame, 0.7, canvas, 0.3, 0)
        FRAME_WINDOW.image(frame_combined, channels = "BGR")
        if response:
            output_text_area.text(response)
        #cv2.imshow("Live Webcam", frame_combined)
        #cv2.imshow("Canvas", canvas)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    else:
        break
cap.release()
cv2.destroyAllWindows()