컴퓨터 비전 공부하기(3D Vision - 핀홀 카메라 모델 딥러닝)

카테고리 없음

by zmo 2024. 12. 22. 22:48

오늘은 핀홀 카메라 모델을 직접 사용해보고자 한다.

먼저 모델을 학습시키기 위한 데이터 셋은 아래의 깃허브에서 제공해 주시는 데이터를 사용하였다.

https://github.com/lmb-freiburg/orion/tree/master/datasets

orion/datasets at master · lmb-freiburg/orion

ORION: Orientation-boosted Voxel Nets for 3D Object Recognition - lmb-freiburg/orion

github.com

깃허브에서 제공하는 ModelNet40는 40개의 클래스(범주)로 구성된 3D 객체 데이터셋이다.

각 클래스에는 다양한 3D 모델들이 포함되어 있는데

예를 들어

chair(의자)

table(테이블)

airplane(비행기)

등이 있다.

들어가기 앞서 이런 3D 객체 데이터셋은 어떤 구조로 이루어져 있는지 파악하고 들어가자

데이터는 주로 OFF 파일 포맷(Object File Format)으로 저장된다. OFF 파일이란 3D 모델의 정점(Vertex)과 면(Face) 정보를 포함하는 텍스트 기반 포맷이다. vertex는 스팀등 게임을 자주 접한 사람이라면 많이 들어봤을 법한 단어이기도 하다.

폴더 구조는 다음과 같이 구성되어 있다.

ModelNet40/
    ├── airplane/
    │   ├── train/
    │   │   ├── airplane_0001.off
    │   │   ├── airplane_0002.off
    │   │   └── ...
    │   └── test/
    │       ├── airplane_0100.off
    │       ├── airplane_0101.off
    │       └── ...
    ├── chair/
    ├── table/
    ├── sofa/
    └── ... (총 40개의 클래스)

각 클래스는 학습용과 테스트용 데이터가 분리되어 제공되고,

OFF 파일 포맷은 다음과 같이 이루어 진다.

OFF
<정점 수> <면 수> <엣지 수>
x1 y1 z1
x2 y2 z2
x3 y3 z3
...
3 v1 v2 v3
3 v1 v2 v3
...

그러면 사전 준비는 끝났으니 본격적으로 코드를 작성해보자 (코드만 적고 해설은 주석으로 달아놓았다)

참고: 코랩을 사용하였다

오늘 알아보기

3D 객체 데이터셋 구조

핀홀 카메라 모델 코드

대망의 핀홀 카메라 모델 코드

!pip install trimesh matplotlib numpy

import os
import trimesh
import numpy as np
import matplotlib.pyplot as plt

# 데이터셋 경로 설정
data_dir = "ModelNet40"
if not os.path.exists(data_dir):
    # 데이터셋 다운로드 링크에서 다운로드
    !wget http://modelnet.cs.princeton.edu/ModelNet40.zip
    !unzip ModelNet40.zip -d .

print(f"Data directory contents: {os.listdir(data_dir)}")

# OFF 파일 로드
import os
import trimesh
import numpy as np
import matplotlib.pyplot as plt

def load_off_file(file_path):
    """
    OFF 파일을 로드하여 3D 메쉬를 반환합니다.
    """
    mesh = trimesh.load(file_path, file_type='off')
    return mesh

# 예제 파일 로드
sample_file = os.path.join(data_dir, "chair", "train", "chair_0001.off")
mesh = load_off_file(sample_file)

# 3D 메쉬 정보 출력
print(f"Vertices: {mesh.vertices.shape}")
print(f"Faces: {mesh.faces.shape}")

# 3D 메쉬 시각화
mesh.show()

# 핀홀 카메라 매트릭스 정의
fx, fy = 800, 800  # 초점 거리
cx, cy = 320, 240  # 중심점
K = np.array([
    [fx, 0, cx],
    [0, fy, cy],
    [0,  0,  1]
])

# 3D 점 투영
def project_points(K, points_3D):
    """
    3D 점들을 핀홀 카메라 모델로 2D 평면에 투영
    K: 카메라 매트릭스
    points_3D: 3D 점들 (Nx3 배열)
    """
    # 동차 좌표로 변환
    points_3D_homogeneous = np.hstack((points_3D, np.ones((points_3D.shape[0], 1))))
    
    # K와 points_3D_homogeneous를 곱하여 투영
    # K를 전치하지 않아도 되는 경우
    points_2D_homogeneous = np.dot(K, points_3D_homogeneous.T).T
    
    # 동차 좌표를 유클리드 좌표로 변환
    points_2D = points_2D_homogeneous[:, :2] / points_2D_homogeneous[:, 2][:, np.newaxis]
    return points_2D

import torch
import random

class ModelNet40Dataset(Dataset):
    def __init__(self, data_dir, split='train', num_points=1024):
        self.data_dir = data_dir
        self.split = split
        self.num_points = num_points  # 고정된 점 개수
        self.classes = os.listdir(data_dir)
        self.files = []
        for cls in self.classes:
            class_dir = os.path.join(data_dir, cls, split)
            self.files += [(cls, os.path.join(class_dir, f)) for f in os.listdir(class_dir) if f.endswith('.off')]
    
    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        cls, file_path = self.files[idx]
        mesh = trimesh.load(file_path, file_type='off')
        vertices = torch.tensor(mesh.vertices, dtype=torch.float32)
        
        # 점 개수를 고정
        if vertices.shape[0] > self.num_points:
            # 점이 많은 경우 무작위로 샘플링
            indices = random.sample(range(vertices.shape[0]), self.num_points)
            vertices = vertices[indices]
        elif vertices.shape[0] < self.num_points:
            # 점이 적은 경우 패딩 추가
            padding = torch.zeros((self.num_points - vertices.shape[0], 3))
            vertices = torch.cat([vertices, padding], dim=0)
        
        return vertices, cls

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import trimesh
import os
import random

# 데이터셋 클래스 정의
class ModelNet40Dataset(Dataset):
    def __init__(self, data_dir, split='train', num_points=1024):
        self.data_dir = data_dir
        self.split = split
        self.num_points = num_points
        self.classes = os.listdir(data_dir)
        self.files = []
        for cls_idx, cls in enumerate(self.classes):
            class_dir = os.path.join(data_dir, cls, split)
            self.files += [(cls_idx, os.path.join(class_dir, f)) for f in os.listdir(class_dir) if f.endswith('.off')]

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        cls, file_path = self.files[idx]
        mesh = trimesh.load(file_path, file_type='off')
        vertices = torch.tensor(mesh.vertices, dtype=torch.float32)
        
        # 점 개수를 고정
        if vertices.shape[0] > self.num_points:
            indices = random.sample(range(vertices.shape[0]), self.num_points)
            vertices = vertices[indices]
        elif vertices.shape[0] < self.num_points:
            padding = torch.zeros((self.num_points - vertices.shape[0], 3))
            vertices = torch.cat([vertices, padding], dim=0)
        
        return vertices, cls

# 간단한 모델 정의
class SimplePointNet(nn.Module):
    def __init__(self, num_classes=40):
        super(SimplePointNet, self).__init__()
        self.fc1 = nn.Linear(3, 64)
        self.fc2 = nn.Linear(64, 128)
        self.fc3 = nn.Linear(128, 256)
        self.fc4 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.max(x, dim=1)[0]  # Global max pooling
        x = self.fc4(x)
        return x

# 데이터셋 경로
data_dir = "ModelNet40"

# 데이터 로더 생성
train_dataset = ModelNet40Dataset(data_dir=data_dir, split='train', num_points=1024)
test_dataset = ModelNet40Dataset(data_dir=data_dir, split='test', num_points=1024)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# 모델, 손실 함수, 옵티마이저 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SimplePointNet(num_classes=len(train_dataset.classes)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 학습 루프
epochs = 10
train_losses = []
test_losses = []
test_accuracies = []

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for points, labels in train_loader:
        points, labels = points.to(device), labels.to(device)
        
        # Forward pass
        outputs = model(points)
        loss = criterion(outputs, labels)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()

    train_losses.append(running_loss / len(train_loader))

    # 테스트 루프
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for points, labels in test_loader:
            points, labels = points.to(device), labels.to(device)
            outputs = model(points)
            loss = criterion(outputs, labels)
            test_loss += loss.item()
            
            # 정확도 계산
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_losses.append(test_loss / len(test_loader))
    test_accuracies.append(100 * correct / total)

    print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {train_losses[-1]:.4f}, Test Loss: {test_losses[-1]:.4f}, Accuracy: {test_accuracies[-1]:.2f}%")

# 학습 및 테스트 결과 시각화
plt.figure(figsize=(12, 6))

# 손실 그래프
plt.subplot(1, 2, 1)
plt.plot(range(1, epochs + 1), train_losses, label='Train Loss')
plt.plot(range(1, epochs + 1), test_losses, label='Test Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Test Loss')
plt.legend()

# 정확도 그래프
plt.subplot(1, 2, 2)
plt.plot(range(1, epochs + 1), test_accuracies, label='Test Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('Test Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

이 코드에는 문제점이 있다는 것을 깨달았다. 코랩에서 모델을 학습시킬때는 자신의 컴퓨터 사양과 코랩에서 기본으로 제공해주는 자원을 잘 활용해서 지정해주도록 하자. 필자의 경우에는 gpu성능이 좋은 편이지만 cpu로 학습을 하겠다고 지정해서 1시간에 5.75% 완료했다. 빠르게 결과를 보고 싶다면 ModelNet40 가 아니라 ModelNet10 의 데이터 셋도 제공하고 있으므로 잘 생각해서 공부해보자.

Developerzmo

고정 헤더 영역

메뉴 레이어

메뉴 리스트

검색 레이어

검색 영역

상세 컨텐츠

본문 제목

본문

추가 정보

인기글

최신글

티스토리툴바