itcctv_soft

19/07/2025

Xây Dựng Mạng Nơ-ron Kết Nối Đầy Đủ (Fully Connected Neural Network - FCNN) Để Dự Đoán Giá Nhà

Chúng ta sẽ xây dựng một mạng nơ-ron kết nối đầy đủ (FCNN) để dự đoán giá nhà dựa trên các thông tin đầu vào như: diện tích nhà (m²), số phòng ngủ, số phòng tắm, vị trí. Sử dụng mạng nơ-ron nhân tạo (Artificial Neural Network - ANN) với các lớp kết nối đầy đủ (Fully Connected Layers).

Bước 1: cài đặt các thư viện cần thiết

import torch

import torch.nn as nn

import torch.optim as optim

import numpy as np

import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

Bước 2: chuẩn bị dữ liệu

# bước 2 chuẩn bị dữ liệu

# Tạo dữ liệu nhà đất (giả lập)

data = {

'Diện tích (m2)': [50, 60, 70, 80, 90, 100, 110, 120, 130, 140],

'Số phòng ngủ': [1, 2, 2, 3, 3, 3, 4, 4, 4, 5],

'Số phòng tắm': [1, 1, 2, 2, 2, 3, 3, 3, 4, 4],

'Vị trí': [1, 2, 2, 3, 3, 3, 4, 4, 4, 5], # Mã hóa vị trí thành số

'Giá nhà (triệu đồng)': [500, 600, 650, 700, 750, 800, 850, 900, 950, 1000]

}

df = pd.DataFrame(data)

# Chia dữ liệu thành đầu vào (X) và đầu ra (y)

X = df.drop(columns=['Giá nhà (triệu đồng)']).values

y = df['Giá nhà (triệu đồng)'].values.reshape(-1, 1)

# Chia tập dữ liệu thành tập huấn luyện (80%) và tập kiểm tra (20%)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Chuẩn hóa dữ liệu đầu vào để tăng hiệu suất mô hình

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)

X_test_scaled = scaler.transform(X_test)

# Chuyển dữ liệu thành tensor PyTorch

X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)

y_train_tensor = torch.tensor(y_train, dtype=torch.float32)

X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)

y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

Bước 3: Xây Dựng Mô Hình FCNN với PyTorch

# Bước 3: Xây Dựng Mô Hình FCNN với PyTorch

# Định nghĩa mô hình mạng nơ-ron kết nối đầy đủ

class FCNN(nn.Module):

def __init__(self):

super(FCNN, self).__init__()

self.fc1 = nn.Linear(X_train.shape[1], 32) # Lớp ẩn 1

self.fc2 = nn.Linear(32, 16) # Lớp ẩn 2

self.fc3 = nn.Linear(16, 8) # Lớp ẩn 3

self.fc4 = nn.Linear(8, 1) # Lớp đầu ra

def forward(self, x):

x = torch.relu(self.fc1(x))

x = torch.relu(self.fc2(x))

x = torch.relu(self.fc3(x))

x = self.fc4(x) # Hồi quy dùng hàm kích hoạt tuyến tính

return x

# Khởi tạo mô hình

model = FCNN()

# Hàm mất mát và bộ tối ưu hóa

criterion = nn.MSELoss()

optimizer = optim.Adam(model.parameters(), lr=0.01)

Bước 4: Huấn luyện mô hình

# Bước 4 huấn luyện

# Số epoch

num_epochs = 500

for epoch in range(num_epochs):

model.train()

# Forward pass

y_pred = model(X_train_tensor)

loss = criterion(y_pred, y_train_tensor)

# Backward pass

optimizer.zero_grad()

loss.backward()

optimizer.step()

# Hiển thị loss mỗi 50 epoch

if (epoch+1) % 50 == 0:

print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

Bước 5: Đánh giá mô hình

# Bước 5: đánh giá

# Đánh giá mô hình trên tập kiểm tra

model.eval()

with torch.no_grad():

y_pred_test = model(X_test_tensor)

test_loss = criterion(y_pred_test, y_test_tensor)

print(f"Loss trên tập kiểm tra: {test_loss.item():.4f}")

Bước 6: Dự đoán giá mới

# bước 6:dự đoán

# Dữ liệu nhà mới cần dự đoán

new_house = np.array([[100, 3, 2, 3]]) # Diện tích 100m2, 3 phòng ngủ, 2 phòng tắm, vị trí 3

new_house_scaled = scaler.transform(new_house) # Chuẩn hóa dữ liệu mới

new_house_tensor = torch.tensor(new_house_scaled, dtype=torch.float32)

# Dự đoán giá nhà

with torch.no_grad():

predicted_price = model(new_house_tensor).item()

print(f"Giá nhà dự đoán: {predicted_price:.2f} triệu đồng")

Kết quả của mô hình là:

A screenshot of a computer

AI-generated content may be incorrect.

Ảnh 4‑11: kết quả của mô hình FCNN dự đoán giá nhà

Nếu ta sử dụng phương pháp hồi quy tuyến tính:

import numpy as np

import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error

# Tạo dữ liệu giả lập (giống như bài toán FCNN)

data = {

'Diện tích (m2)': [50, 60, 70, 80, 90, 100, 110, 120, 130, 140],

'Số phòng ngủ': [1, 2, 2, 3, 3, 3, 4, 4, 4, 5],

'Số phòng tắm': [1, 1, 2, 2, 2, 3, 3, 3, 4, 4],

'Vị trí': [1, 2, 2, 3, 3, 3, 4, 4, 4, 5], # Mã hóa vị trí thành số

'Giá nhà (triệu đồng)': [500, 600, 650, 700, 750, 800, 850, 900, 950, 1000]

}

df = pd.DataFrame(data)

# Tách đầu vào (X) và đầu ra (y)

X = df.drop(columns=['Giá nhà (triệu đồng)']).values

y = df['Giá nhà (triệu đồng)'].values

# Chia dữ liệu thành tập huấn luyện và tập kiểm tra (80% train, 20% test)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Chuẩn hóa dữ liệu

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)

X_test_scaled = scaler.transform(X_test)

# Khởi tạo mô hình hồi quy tuyến tính

model = LinearRegression()

# Huấn luyện mô hình

model.fit(X_train_scaled, y_train)

# Dự đoán trên tập kiểm tra

y_pred = model.predict(X_test_scaled)

# Tính toán MSE và RMSE

mse = mean_squared_error(y_test, y_pred)

rmse = np.sqrt(mse)

print(f"MSE: {mse:.4f}")

print(f"RMSE: {rmse:.4f}")

# Hệ số hồi quy và bias

print("Hệ số hồi quy:", model.coef_)

print("Bias:", model.intercept_)

# Dự đoán giá nhà mới (diện tích 100m2, 3 phòng ngủ, 2 phòng tắm, vị trí 3)

new_house = np.array([[100, 3, 2, 3]])

new_house_scaled = scaler.transform(new_house)

predicted_price = model.predict(new_house_scaled)

print(f"Giá nhà dự đoán: {predicted_price[0]:.2f} triệu đồng")

Ta có kết quả như sau:

A number on a white background

AI-generated content may be incorrect.

Ảnh 4‑12: Kết quả dự đoán giá nhà dựa trên phương pháp hồi quy tuyến tính

Từ 2 kết quả trên ta thấy cũng có sự tương đồng giữa giá dự đoán. Tuy nhiên hồi quy tuyến tính có thể áp dụng để dự đoán giá nhà nếu mối quan hệ tuyến tính chi phối dữ liệu. Nếu dữ liệu có sự phi tuyến tính mạnh, mạng neural sẽ có độ chính xác cao hơn. Hồi quy tuyến tính chạy nhanh hơn và dễ triển khai, nhưng FCNN có khả năng học tốt hơn trên tập dữ liệu lớn và phức tạp.

0 lượt chia sẻ