2_naturalScenes

Jun 21, 2024 · Edwin.Liang

数据来源

数据来自kaggle Intel Image Classification ，通过[3,150,150]三通道，150像素宽度的正方形图片，判断图像内容属于六种中的哪一类

{‘buildings’ -> 0,

‘forest’ -> 1,

‘glacier’ -> 2,

‘mountain’ -> 3,

‘sea’ -> 4,

‘street’ -> 5 }

数据分析

数据是图片，所以要检查一下图片的大小格式是否符合规范

训练数据格式直方图

训练数据总共14034条，其中RGB三通道与图片宽度都符合要求，高度层面个别数据存在高度不够的问题，有问题的数据共39条，占比较少，这里就不做图像resize的处理，直接删除。

测试数据格式直方图

测试数据总共3000条，与训练数据一样，只有图片高度存在问题，有问题的数据共7条，占比较少直接删除。

20240621

特征工程

本次数据未做特征工程处理

模型确定

模型:选择最基本的$CNN$ $LeNet$ 网络作为本次测试模型，$CNN$部分设置3个卷积-池化块，全连接层设置为7层，$BATCHSIZE$设置为128
优化器:选择$Adadelta$优化器
损失函数:选择$CrossEntropyLoss$函数

完整代码

import sys
sys.path.append('/home/kureisersen/Documents/python3/ai/')
from SOURCE import *


class predData(Dataset):
    def __init__(self):
        self.root_path = '/home/kureisersen/Documents/python3/ai/data/dateDir/natural_scenes/seg_pred/seg_pred/'
        self.file_dir = os.listdir(self.root_path)

    def __len__(self):
        return len(self.file_dir)

    def __getitem__(self, idx):
        image_path = self.root_path + self.file_dir[idx]
        img_jpg = Image.open(image_path).convert('RGB') 
        to_tensor = transforms.ToTensor()
        img_tensor = to_tensor(img_jpg)
        return img_tensor


# {'buildings' -> 0,
# 'forest' -> 1,
# 'glacier' -> 2,
# 'mountain' -> 3,
# 'sea' -> 4,
# 'street' -> 5 }

class trainData(Dataset):
    def __init__(self):
        self.root_path = '/home/kureisersen/Documents/python3/ai/data/dateDir/natural_scenes/seg_train/seg_train/'
        self.item = {'buildings':0,'forest':1,'glacier':2,'mountain':3,'sea':4,'street':5}
        self.file_dir = []
        for key, value in self.item.items():
            file_arr = os.listdir(self.root_path + key + '/')
            file_arr = [key + '/' + s for s in file_arr]
            self.file_dir += file_arr

    def __len__(self):
        return len(self.file_dir)

    def __getitem__(self, idx):
        image_path = self.root_path + self.file_dir[idx]
        img_jpg = Image.open(image_path).convert('RGB') 
        to_tensor = transforms.ToTensor()
        img_tensor = to_tensor(img_jpg)
        return img_tensor,torch.tensor(self.item[self.file_dir[idx].split("/")[0]],dtype=torch.long)


class testData(Dataset):
    def __init__(self):
        self.root_path = '/home/kureisersen/Documents/python3/ai/data/dateDir/natural_scenes/seg_test/seg_test/'
        self.item = {'buildings':0,'forest':1,'glacier':2,'mountain':3,'sea':4,'street':5}
        self.file_dir = []
        for key, value in self.item.items():
            file_arr = os.listdir(self.root_path + key + '/')
            file_arr = [key + '/' + s for s in file_arr]
            self.file_dir += file_arr

    def __len__(self):
        return len(self.file_dir)

    def __getitem__(self, idx):
        image_path = self.root_path + self.file_dir[idx]
        img_jpg = Image.open(image_path).convert('RGB') 
        to_tensor = transforms.ToTensor()
        img_tensor = to_tensor(img_jpg)
        return img_tensor,torch.tensor(self.item[self.file_dir[idx].split("/")[0]],dtype=torch.long)



class Net(torch.nn.Module):
    def __init__(self):
        super(Net,self).__init__()

        self.n_hidden = 20
        self.conv_layer = torch.nn.Sequential(
            # input [batch_size,3,150,150]
            torch.nn.Conv2d(3, 16, 3, stride=1, padding=1), # output [batch_size,16,150,150]
            torch.nn.MaxPool2d(kernel_size=5, stride=5, padding=0), # output [batch_size,16,30,30]
            torch.nn.Conv2d(16, 32, 3, stride=1, padding=1), # output [batch_size,32,30,30]
            torch.nn.MaxPool2d(kernel_size=3, stride=3, padding=0), # output [batch_size,32,10,10]
            torch.nn.Conv2d(32, 32, 3, stride=1, padding=1), # output [batch_size,32,10,10]
            torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0), # output [batch_size,32,5,5]
        )
        self.full_layer = torch.nn.Sequential(
            torch.nn.Linear(32*5*5,self.n_hidden), # 1
            torch.nn.ReLU(),
            torch.nn.Linear(self.n_hidden,self.n_hidden), # 2
            torch.nn.ReLU(),
            torch.nn.Linear(self.n_hidden,self.n_hidden), # 3
            torch.nn.ReLU(),
            torch.nn.Linear(self.n_hidden,self.n_hidden), # 4
            torch.nn.ReLU(),
            torch.nn.Linear(self.n_hidden,self.n_hidden), # 5
            torch.nn.ReLU(),
            torch.nn.Linear(self.n_hidden,self.n_hidden), # 6
            torch.nn.ReLU(),
            torch.nn.Linear(self.n_hidden,6)
        )


    def forward(self, input):
        out = self.conv_layer(input)
        out = out.view(input.size(0),-1)
        out = self.full_layer(out)
        return out


        

if __name__ == '__main__':
    BATCH_SIZE = 128


    # pred_loader = DataLoader(dataset=predData(), batch_size=BATCH_SIZE,drop_last=True)
    train_loader = DataLoader(dataset=trainData(), batch_size=BATCH_SIZE, shuffle=True,drop_last=True,num_workers=28,pin_memory=True)
    # test_loader = DataLoader(dataset=testData(), batch_size=BATCH_SIZE,drop_last=True)



    net = Net().cuda()

    # train model
    lossFun = torch.nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.Adadelta(net.parameters())

    for i in range(2000):
        loss_sum = 0
        accuracy = 0
        print('--------第{}轮--------'.format(i))

        for idx,(data,lable) in enumerate(train_loader):
            data = data.cuda()
            lable = lable.cuda()

            y_pred = net(data)
            lable = torch.reshape(lable,(1,BATCH_SIZE))[0]
            # print(y_pred,lable)

            loss = lossFun(y_pred,lable)

            # print(y_pred)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_sum += loss.item()
            accuracy += (y_pred.argmax(1)==lable).sum()/BATCH_SIZE
            # if(idx%100==0):
            #     print("第{}次数据，loss为{}".format(idx,loss.item()))

        print('第{}轮--平均loss值{}--平均正确率{}'.format(i,loss_sum/len(train_loader),accuracy/len(train_loader)))

训练结果

模型在训练集中的正确率稳定在99%，在测试集中的正确率稳定在75%，很明显模型过拟合了，下面针对这个问题改进一下

20240625

改进思路

过拟合问题比较常见，改进思路有以下几种：

缩小模型规模
在模型中增加BN层
在全连接网络中增加$Dropout$操作
增加训练数据
正则化

在改进过程中，分别对上述做法进行了尝试，下面描述一下不同方法的效果

缩小模型规模
- 模型确实没有了过拟合的特征，但是整体正确率变差，也并没有解决测试集与训练集正确率差距大的问题，这种操作在当前场景下不是很适用。
在全连接网络中增加$Dropout$操作
- 同等规模下模型训练速度变慢，需要迭代更多轮才能达到相同效果，但是依然没有解决过拟合问题。并且这种方法的适用性比较差，最新的模型中大都抛弃了这种方法。
在模型中增加BN层
- 有一定效果，模型在前几轮迭代中可以达到比较高的正确率，但是达到瓶颈期后的正确率降低
增加训练数据
- 在图片识别中可以使用图形变化，将图片旋转不同角度，来达到增加训练数据的目的，这种方法在实际使用过程中相对可行。下面实际看一下效果

特征工程

通过$transforms$随机水平翻转图片、随机旋转测试图片，以此增加测试数据

模型确定

模型:选择最基本的$CNN$ $LeNet$ 网络作为本次测试模型，$CNN$部分设置3个卷积-池化块，全连接层设置为7层，$BATCHSIZE$设置为128
优化器:选择$Adadelta$优化器
损失函数:选择$CrossEntropyLoss$函数

完整代码

import sys
sys.path.append('/home/kureisersen/Documents/python3/ai/')
from SOURCE import *


class predData(Dataset):
    def __init__(self):
        self.root_path = '/home/kureisersen/Documents/python3/ai/data/dateDir/natural_scenes/seg_pred/seg_pred/'
        self.file_dir = os.listdir(self.root_path)

    def __len__(self):
        return len(self.file_dir)

    def __getitem__(self, idx):
        image_path = self.root_path + self.file_dir[idx]
        img_jpg = Image.open(image_path).convert('RGB') 
        to_tensor = transforms.ToTensor()
        img_tensor = to_tensor(img_jpg)
        return img_tensor


# {'buildings' -> 0,
# 'forest' -> 1,
# 'glacier' -> 2,
# 'mountain' -> 3,
# 'sea' -> 4,
# 'street' -> 5 }

class trainData(Dataset):
    def __init__(self):
        self.root_path = '/home/kureisersen/Documents/python3/ai/data/dateDir/natural_scenes/seg_train/seg_train/'
        self.item = {'buildings':0,'forest':1,'glacier':2,'mountain':3,'sea':4,'street':5}
        self.file_dir = []
        for key, value in self.item.items():
            file_arr = os.listdir(self.root_path + key + '/')
            file_arr = [key + '/' + s for s in file_arr]
            self.file_dir += file_arr

    def __len__(self):
        return len(self.file_dir)*5

    def __getitem__(self, idx):
        integer = int(idx/5)
        # remainder = idx & 4

        image_path = self.root_path + self.file_dir[integer]
        img_jpg = Image.open(image_path).convert('RGB') 


        transforms_comp = transforms.Compose([
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomRotation(60),
            transforms.ToTensor(),
        ])

        img_tensor = transforms_comp(img_jpg)
        return img_tensor,torch.tensor(self.item[self.file_dir[integer].split("/")[0]],dtype=torch.long)


class testData(Dataset):
    def __init__(self):
        self.root_path = '/home/kureisersen/Documents/python3/ai/data/dateDir/natural_scenes/seg_test/seg_test/'
        self.item = {'buildings':0,'forest':1,'glacier':2,'mountain':3,'sea':4,'street':5}
        self.file_dir = []
        for key, value in self.item.items():
            file_arr = os.listdir(self.root_path + key + '/')
            file_arr = [key + '/' + s for s in file_arr]
            self.file_dir += file_arr

    def __len__(self):
        return len(self.file_dir)

    def __getitem__(self, idx):
        image_path = self.root_path + self.file_dir[idx]
        img_jpg = Image.open(image_path).convert('RGB') 
        to_tensor = transforms.ToTensor()
        img_tensor = to_tensor(img_jpg)
        return img_tensor,torch.tensor(self.item[self.file_dir[idx].split("/")[0]],dtype=torch.long)



class Net(torch.nn.Module):
    def __init__(self):
        super(Net,self).__init__()

        self.n_hidden = 20
        self.conv_layer = torch.nn.Sequential(
            # input [batch_size,3,150,150]
            torch.nn.Conv2d(3, 16, 3, stride=1, padding=1), # output [batch_size,16,150,150]
            torch.nn.MaxPool2d(kernel_size=5, stride=5, padding=0), # output [batch_size,16,30,30]
            torch.nn.Conv2d(16, 32, 3, stride=1, padding=1), # output [batch_size,32,30,30]
            torch.nn.MaxPool2d(kernel_size=3, stride=3, padding=0), # output [batch_size,32,10,10]
            torch.nn.Conv2d(32, 32, 3, stride=1, padding=1), # output [batch_size,32,10,10]
            torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0), # output [batch_size,32,5,5]
        )
        self.full_layer = torch.nn.Sequential(
            torch.nn.Linear(32*5*5,self.n_hidden), # 1
            torch.nn.ReLU(),
            torch.nn.Linear(self.n_hidden,self.n_hidden), # 2
            torch.nn.ReLU(),
            torch.nn.Linear(self.n_hidden,self.n_hidden), # 3
            torch.nn.ReLU(),
            torch.nn.Linear(self.n_hidden,self.n_hidden), # 4
            torch.nn.ReLU(),
            torch.nn.Linear(self.n_hidden,self.n_hidden), # 5
            torch.nn.ReLU(),
            torch.nn.Linear(self.n_hidden,self.n_hidden), # 6
            torch.nn.ReLU(),
            torch.nn.Linear(self.n_hidden,6)
        )


    def forward(self, input):
        out = self.conv_layer(input)
        out = out.view(input.size(0),-1)
        out = self.full_layer(out)
        return out


        

if __name__ == '__main__':
    BATCH_SIZE = 128


    # pred_loader = DataLoader(dataset=predData(), batch_size=BATCH_SIZE,drop_last=True)
    train_loader = DataLoader(dataset=trainData(), batch_size=BATCH_SIZE, shuffle=True,drop_last=True,num_workers=28,pin_memory=True)
    valid_loader = DataLoader(dataset=testData(), batch_size=BATCH_SIZE, shuffle=True, drop_last=True,num_workers=28,pin_memory=True)



    net = Net().cuda()

    # train model
    lossFun = torch.nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.Adadelta(net.parameters())

    for i in range(2000):
        loss_sum = 0
        accuracy = 0
        print('--------第{}轮--------'.format(i))

        for idx,(data,lable) in enumerate(train_loader):
            data = data.cuda()
            lable = lable.cuda()

            y_pred = net(data)
            lable = torch.reshape(lable,(1,BATCH_SIZE))[0]
            # print(y_pred,lable)

            loss = lossFun(y_pred,lable)

            # print(y_pred)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_sum += loss.item()
            accuracy += (y_pred.argmax(1)==lable).sum()/BATCH_SIZE
            # if(idx%100==0):
            #     print("第{}次数据，loss为{}".format(idx,loss.item()))

        # print('第{}轮--平均loss值{}--平均正确率{}'.format(i,loss_sum/len(train_loader),accuracy/len(train_loader)))

        valid_loss_sum = 0
        valid_accuracy = 0
        with torch.no_grad():
            for idx,(data,lable) in enumerate(valid_loader):
                data = data.cuda()
                lable = lable.cuda()

                y_pred = net(data)
                lable = torch.reshape(lable,(1,BATCH_SIZE))[0]
                loss = lossFun(y_pred,lable)


                valid_loss_sum += loss.item()
                valid_accuracy += (y_pred.argmax(1)==lable).sum()/BATCH_SIZE

        print('第{}轮--平均loss值{}--平均正确率{}--验证集loss值{}--验证集平均正确率{}'.format(i,loss_sum/len(train_loader),accuracy/len(train_loader),valid_loss_sum/len(valid_loader),valid_accuracy/len(valid_loader)))

训练结果

模型在训练集中的正确率稳定在86%，在测试集中的正确率稳定在83%，过拟合问题有一定好转，但是模型正确率不高

20240629

改进思路

正确率不高，期望通过增加模型复杂度解决，打算从$LeNet$换成$AlexNet$看看效果

特征工程

仍然是通过$transforms$随机水平翻转图片、随机旋转测试图片，以此增加测试数据，没有什么变化

模型确定

模型:选择$CNN$ $AlexNet$ 网络作为本次测试模型，$CNN$部分设置4个卷积-$RELU$-卷积-$RELU$-池化块，卷积的大趋势是减少全连接层，并逐步由卷积层代替，因此在该模型全连接层减少为3层，$BATCHSIZE$设置为128
优化器:选择$Adadelta$优化器
损失函数:选择$CrossEntropyLoss$函数

完整代码

import sys
sys.path.append('/home/kureisersen/Documents/python3/ai/')
from SOURCE import *


class predData(Dataset):
    def __init__(self):
        self.root_path = '/home/kureisersen/Documents/python3/ai/data/dateDir/natural_scenes/seg_pred/seg_pred/'
        self.file_dir = os.listdir(self.root_path)

    def __len__(self):
        return len(self.file_dir)

    def __getitem__(self, idx):
        image_path = self.root_path + self.file_dir[idx]
        img_jpg = Image.open(image_path).convert('RGB') 
        to_tensor = transforms.ToTensor()
        img_tensor = to_tensor(img_jpg)
        return img_tensor


# {'buildings' -> 0,
# 'forest' -> 1,
# 'glacier' -> 2,
# 'mountain' -> 3,
# 'sea' -> 4,
# 'street' -> 5 }

class trainData(Dataset):
    def __init__(self):
        self.root_path = '/home/kureisersen/Documents/python3/ai/data/dateDir/natural_scenes/seg_train/seg_train/'
        self.item = {'buildings':0,'forest':1,'glacier':2,'mountain':3,'sea':4,'street':5}
        self.file_dir = []
        for key, value in self.item.items():
            file_arr = os.listdir(self.root_path + key + '/')
            file_arr = [key + '/' + s for s in file_arr]
            self.file_dir += file_arr

    def __len__(self):
        return len(self.file_dir)*5

    def __getitem__(self, idx):
        integer = int(idx/5)
        # remainder = idx & 4

        image_path = self.root_path + self.file_dir[integer]
        img_jpg = Image.open(image_path).convert('RGB') 


        transforms_comp = transforms.Compose([
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomRotation(60),
            transforms.ToTensor(),
        ])

        img_tensor = transforms_comp(img_jpg)
        return img_tensor,torch.tensor(self.item[self.file_dir[integer].split("/")[0]],dtype=torch.long)


class testData(Dataset):
    def __init__(self):
        self.root_path = '/home/kureisersen/Documents/python3/ai/data/dateDir/natural_scenes/seg_test/seg_test/'
        self.item = {'buildings':0,'forest':1,'glacier':2,'mountain':3,'sea':4,'street':5}
        self.file_dir = []
        for key, value in self.item.items():
            file_arr = os.listdir(self.root_path + key + '/')
            file_arr = [key + '/' + s for s in file_arr]
            self.file_dir += file_arr

    def __len__(self):
        return len(self.file_dir)

    def __getitem__(self, idx):
        image_path = self.root_path + self.file_dir[idx]
        img_jpg = Image.open(image_path).convert('RGB') 
        to_tensor = transforms.ToTensor()
        img_tensor = to_tensor(img_jpg)
        return img_tensor,torch.tensor(self.item[self.file_dir[idx].split("/")[0]],dtype=torch.long)



class Net(torch.nn.Module):
    def __init__(self):
        super(Net,self).__init__()

        self.n_hidden = 2000
        self.conv_layer = torch.nn.Sequential(
            # input [batch_size,3,150,150]
            torch.nn.Conv2d(3, 64, 3, stride=1, padding=1), # output [batch_size,16,150,150]
            torch.nn.ReLU(),
            torch.nn.Conv2d(64, 64, 3, stride=1, padding=1), # output [batch_size,16,150,150]
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=5, stride=5, padding=0), # output [batch_size,16,30,30]
            # torch.nn.ReLU(),
            # torch.nn.BatchNorm2d(16),

            torch.nn.Conv2d(64, 128, 3, stride=1, padding=1), # output [batch_size,32,30,30]
            torch.nn.ReLU(),
            torch.nn.Conv2d(128, 128, 3, stride=1, padding=1), # output [batch_size,16,150,150]
            torch.nn.ReLU(),
            # torch.nn.Conv2d(64, 64, 3, stride=1, padding=1), # output [batch_size,16,150,150]
            # torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=3, stride=3, padding=0), # output [batch_size,32,10,10]
            # torch.nn.ReLU(),
            # torch.nn.BatchNorm2d(32),

            torch.nn.Conv2d(128, 256, 3, stride=1, padding=1), # output [batch_size,32,10,10]
            torch.nn.ReLU(),
            torch.nn.Conv2d(256, 256, 3, stride=1, padding=1), # output [batch_size,16,150,150]
            torch.nn.ReLU(),
            # torch.nn.Conv2d(64, 64, 3, stride=1, padding=1), # output [batch_size,16,150,150]
            # torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0), # output [batch_size,32,5,5]


            torch.nn.Conv2d(256, 512, 3, stride=1, padding=1), # output [batch_size,32,10,10]
            torch.nn.ReLU(),
            torch.nn.Conv2d(512, 512, 3, stride=1, padding=1), # output [batch_size,16,150,150]
            torch.nn.ReLU(),
            # torch.nn.Conv2d(64, 64, 3, stride=1, padding=1), # output [batch_size,16,150,150]
            # torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=5, stride=1, padding=0), # output [batch_size,32,5,5]
        )
        self.full_layer = torch.nn.Sequential(
            torch.nn.Linear(512*1*1,self.n_hidden), # 1
            torch.nn.ReLU(),

            torch.nn.Linear(self.n_hidden,self.n_hidden), # 2
            torch.nn.ReLU(),
            # torch.nn.Linear(self.n_hidden,self.n_hidden), # 3
            # torch.nn.ReLU(),
            # torch.nn.Linear(self.n_hidden,self.n_hidden), # 4
            # torch.nn.ReLU(),
            # torch.nn.Dropout(0.5),
            torch.nn.Linear(self.n_hidden,6)
        )


    def forward(self, input):
        out = self.conv_layer(input)
        out = out.view(input.size(0),-1)
        out = self.full_layer(out)
        return out


        

if __name__ == '__main__':
    BATCH_SIZE = 128


    # pred_loader = DataLoader(dataset=predData(), batch_size=BATCH_SIZE,drop_last=True)
    train_loader = DataLoader(dataset=trainData(), batch_size=BATCH_SIZE, shuffle=True,drop_last=True,num_workers=28,pin_memory=True)
    valid_loader = DataLoader(dataset=testData(), batch_size=BATCH_SIZE, shuffle=True, drop_last=True,num_workers=28,pin_memory=True)



    net = Net().cuda()

    # train model
    lossFun = torch.nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.Adadelta(net.parameters())

    for i in range(2000):
        loss_sum = 0
        accuracy = 0
        print('--------第{}轮--------'.format(i))

        for idx,(data,lable) in enumerate(train_loader):
            data = data.cuda()
            lable = lable.cuda()

            y_pred = net(data)
            lable = torch.reshape(lable,(1,BATCH_SIZE))[0]
            # print(y_pred,lable)

            loss = lossFun(y_pred,lable)

            # print(y_pred)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_sum += loss.item()
            accuracy += (y_pred.argmax(1)==lable).sum()/BATCH_SIZE
            # if(idx%100==0):
            #     print("第{}次数据，loss为{}".format(idx,loss.item()))

        # print('第{}轮--平均loss值{}--平均正确率{}'.format(i,loss_sum/len(train_loader),accuracy/len(train_loader)))

        valid_loss_sum = 0
        valid_accuracy = 0
        with torch.no_grad():
            for idx,(data,lable) in enumerate(valid_loader):
                data = data.cuda()
                lable = lable.cuda()

                y_pred = net(data)
                lable = torch.reshape(lable,(1,BATCH_SIZE))[0]
                loss = lossFun(y_pred,lable)


                valid_loss_sum += loss.item()
                valid_accuracy += (y_pred.argmax(1)==lable).sum()/BATCH_SIZE

        print('第{}轮--平均loss值{}--平均正确率{}--验证集loss值{}--验证集平均正确率{}'.format(i,loss_sum/len(train_loader),accuracy/len(train_loader),valid_loss_sum/len(valid_loader),valid_accuracy/len(valid_loader)))

训练结果

模型在训练集中的正确率稳定在93%，在测试集中的正确率稳定在86%，模型正确率仍有提高的空间

20240713

改进思路

正确率不高，期望通过增加模型复杂度解决，打算从$AlexNet$换成$GoogLenet$看看效果

特征工程

原先随机旋转的方式会降低准确率，因此从随机旋转变成随机反转+随机放大

模型确定

模型:选择$CNN$ $GoogLeNet$ 网络作为本次测试模型，$CNN$部分设置5个$Inception$块，再尝试添加更多$Inception$块的过程中遇到了梯度消失的问题，后续可以c尝试将模型替换为$ResNet$解决该问题，在该模型全连接层减少为1层，$BATCHSIZE$设置为128
优化器:选择$Adadelta$优化器
损失函数:选择$CrossEntropyLoss$函数

完整代码

import sys
sys.path.append('/home/kureisersen/Documents/python3/ai/')
from SOURCE import *



class trainData(Dataset):
    def __init__(self):
        self.root_path = '/home/kureisersen/Documents/python3/ai/data/dateDir/natural_scenes/seg_train/seg_train/'
        self.item = {'buildings':0,'forest':1,'glacier':2,'mountain':3,'sea':4,'street':5}
        self.file_dir = []
        for key, value in self.item.items():
            file_arr = os.listdir(self.root_path + key + '/')
            file_arr = [key + '/' + s for s in file_arr]
            self.file_dir += file_arr

    def __len__(self):
        return len(self.file_dir)*5

    def __getitem__(self, idx):
        integer = int(idx/5)
        # remainder = idx & 4

        image_path = self.root_path + self.file_dir[integer]
        img_jpg = Image.open(image_path).convert('RGB') 


        transforms_comp = transforms.Compose([
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomResizedCrop(size=150, scale=(0.08, 1)),
            # transforms.RandomRotation(60),
            transforms.ToTensor(),
        ])

        img_tensor = transforms_comp(img_jpg)
        return img_tensor,torch.tensor(self.item[self.file_dir[integer].split("/")[0]],dtype=torch.long)


class testData(Dataset):
    def __init__(self):
        self.root_path = '/home/kureisersen/Documents/python3/ai/data/dateDir/natural_scenes/seg_test/seg_test/'
        self.item = {'buildings':0,'forest':1,'glacier':2,'mountain':3,'sea':4,'street':5}
        self.file_dir = []
        for key, value in self.item.items():
            file_arr = os.listdir(self.root_path + key + '/')
            file_arr = [key + '/' + s for s in file_arr]
            self.file_dir += file_arr

    def __len__(self):
        return len(self.file_dir)

    def __getitem__(self, idx):
        image_path = self.root_path + self.file_dir[idx]
        img_jpg = Image.open(image_path).convert('RGB') 
        to_tensor = transforms.ToTensor()
        img_tensor = to_tensor(img_jpg)
        return img_tensor,torch.tensor(self.item[self.file_dir[idx].split("/")[0]],dtype=torch.long)



class ConvRelu(torch.nn.Module):
    def __init__(self,inTunnel, outTunnel, kernelSize, **kwargs):
        super(ConvRelu,self).__init__()

        self.conv_layer = torch.nn.Sequential(
            torch.nn.Conv2d(inTunnel, outTunnel, kernelSize, **kwargs),
            torch.nn.ReLU(),
        )
    
    def forward(self, input):
        out = self.conv_layer(input)
        return out



class Inception(torch.nn.Module):
    def __init__(self,inTunnecl,FirstOutTunnel,SecOutTunnel,ThirOutTunnel,ForthOutTunnel):
        super(Inception,self).__init__()

        self.block1 = torch.nn.Sequential(
            ConvRelu(inTunnecl,FirstOutTunnel,1,stride=1),
        )

        self.block2 = torch.nn.Sequential(
            ConvRelu(inTunnecl,int(SecOutTunnel/2),1,stride=1),
            ConvRelu(int(SecOutTunnel/2),SecOutTunnel,3,stride=1,padding=1),
        )

        self.block3 = torch.nn.Sequential(
            ConvRelu(inTunnecl,int(ThirOutTunnel/2),1,stride=1),
            ConvRelu(int(ThirOutTunnel/2),ThirOutTunnel,5,stride=1,padding=2),
        )

        self.block4 = torch.nn.Sequential(
            torch.nn.MaxPool2d(kernel_size=3, stride=1, padding=1), 
            ConvRelu(inTunnecl,ForthOutTunnel,1,stride=1),
        )

    def forward(self, input):
        out1 = self.block1(input)
        out2 = self.block2(input)
        out3 = self.block3(input)
        out4 = self.block4(input)

        out = torch.cat((out1,out2,out3,out4),1)
        return out








class Net(torch.nn.Module):
    def __init__(self):
        super(Net,self).__init__()

        self.conv_layer = torch.nn.Sequential(
            # Conv-Maxpool（忽略LocalRespNorm）
            ConvRelu(3, 64, 7, stride=1, padding=3),   # torch.Size([10, 64, 16, 16])
            torch.nn.MaxPool2d(kernel_size=3, stride=3, padding=0),       # torch.Size([10, 64, 7, 7])
           
            # Conv-Conv-Maxpool（忽略LocalRespNorm）
            ConvRelu(64, 64, 1),
            ConvRelu(64, 128, 3, stride=1, padding=1), # torch.Size([10, 192, 7, 7])
            torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=1),       # torch.Size([10, 192, 4, 4])
           
            # Inception（3a）
            Inception(128, 64, 96, 64, 32),     # torch.Size([10, 256, 4, 4])
            # Inception（3b）
            Inception(256, 128, 192, 128, 64),   # torch.Size([10, 480, 4, 4])
           
            torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=1),       # torch.Size([10, 480, 2, 2])
           
            # Inception（4a）
            Inception(512, 256, 384, 256, 128),    # torch.Size([10, 512, 2, 2])
            # Inception（4b）
            Inception(1024, 256, 384, 256, 128),   # torch.Size([10, 512, 2, 2])
            # # Inception（4c）
            # Inception(512, 128, 192, 128, 64),   # torch.Size([10, 512, 2, 2])
            # # Inception（4d）
            # Inception(512, 128, 192, 128, 64),   # torch.Size([10, 528, 2, 2])
            # # Inception（4e）
            # Inception(528, 256, 160, 320, 32, 128, 128), # torch.Size([10, 832, 2, 2])
           
            torch.nn.MaxPool2d(kernel_size=3, stride=3, padding=1),       # torch.Size([10, 832, 1, 1])
           
            # Inception（5a）
            Inception(1024, 256, 384, 256, 128), # torch.Size([10, 832, 1, 1])
            # # Inception（5b）
            # Inception(1024, 256, 384, 256, 128), # torch.Size([10, 1024, 1, 1])

            torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=1),       # torch.Size([10, 832, 1, 1])

            torch.nn.AdaptiveAvgPool2d((1,1)), 
        )
        self.full_layer = torch.nn.Sequential(
            torch.nn.Linear(1024,6), # 1
            # torch.nn.ReLU(),
            # torch.nn.Linear(self.n_hidden,6)
        )


    def forward(self, input):
        out = self.conv_layer(input)
        out = out.view(input.size(0),-1)
        out = self.full_layer(out)
        return out

def printNet():
    net = Net()
    summary(net, (128, 3, 150, 150)) 

def startTrain():
    BATCH_SIZE = 128


    train_loader = DataLoader(dataset=trainData(), batch_size=BATCH_SIZE, shuffle=True,drop_last=True,num_workers=28,pin_memory=True)
    valid_loader = DataLoader(dataset=testData(), batch_size=BATCH_SIZE, shuffle=True, drop_last=True,num_workers=28,pin_memory=True)



    net = Net().cuda()

    lossFun = torch.nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.Adadelta(net.parameters())

    for i in range(2000):
        loss_sum = 0
        accuracy = 0
        print('--------第{}轮--------'.format(i))

        for idx,(data,lable) in enumerate(train_loader):
            data = data.cuda()
            lable = lable.cuda()

            y_pred = net(data)
            lable = torch.reshape(lable,(1,BATCH_SIZE))[0]

            loss = lossFun(y_pred,lable)


            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_sum += loss.item()
            accuracy += (y_pred.argmax(1)==lable).sum()/BATCH_SIZE


        valid_loss_sum = 0
        valid_accuracy = 0
        with torch.no_grad():
            for idx,(data,lable) in enumerate(valid_loader):
                data = data.cuda()
                lable = lable.cuda()

                y_pred = net(data)
                lable = torch.reshape(lable,(1,BATCH_SIZE))[0]
                loss = lossFun(y_pred,lable)


                valid_loss_sum += loss.item()
                valid_accuracy += (y_pred.argmax(1)==lable).sum()/BATCH_SIZE

        print('第{}轮--平均loss值{}--平均正确率{}--验证集loss值{}--验证集平均正确率{}'.format(i,loss_sum/len(train_loader),accuracy/len(train_loader),valid_loss_sum/len(valid_loader),valid_accuracy/len(valid_loader)))

if __name__ == '__main__':
    startTrain()
    # printNet()

训练结果

训练集正确率稳定在91%，预测集正确率稳定89%，预测集正确率进一步增大，同时训练集和预测集正确率差距减少，这种变化更多可能是数据特种工程带来的变化，也预示着后续动作只要提高训练集正确率就可以了。训练集在不断训练的过程中遇到了梯度消失的问题，该问题需要更换模型来改进。

2024-07-13 13.46.22

20240719

改进思路

期望进一步提升模型在训练集和测试集的准确率，但是通过不断加深网络深度并没有起到积极的效果，打算从$GoogLenet$换成$Resnet$看看效果

特征工程

原先随机旋转的方式会降低准确率，因此从随机旋转变成随机反转+随机放大

模型确定

模型:选择$CNN$ $ResNet$ 网络作为本次测试模型，$CNN$部分设置5个$Resnet Block$块，在该模型全连接层为1层，$BATCHSIZE$设置为128
优化器:选择$Adadelta$优化器
损失函数:选择$CrossEntropyLoss$函数

完整代码

import sys
sys.path.append('/home/kureisersen/Documents/python3/ai/')
from SOURCE import *



class trainData(Dataset):
    def __init__(self):
        self.root_path = '/home/kureisersen/Documents/python3/ai/data/dateDir/natural_scenes/seg_train/seg_train/'
        self.item = {'buildings':0,'forest':1,'glacier':2,'mountain':3,'sea':4,'street':5}
        self.file_dir = []
        for key, value in self.item.items():
            file_arr = os.listdir(self.root_path + key + '/')
            file_arr = [key + '/' + s for s in file_arr]
            self.file_dir += file_arr

    def __len__(self):
        return len(self.file_dir)*5

    def __getitem__(self, idx):
        integer = int(idx/5)
        # remainder = idx & 4

        image_path = self.root_path + self.file_dir[integer]
        img_jpg = Image.open(image_path).convert('RGB') 


        transforms_comp = transforms.Compose([
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomResizedCrop(size=150, scale=(0.08, 1)),
            # transforms.RandomRotation(60),
            transforms.ToTensor(),
        ])

        img_tensor = transforms_comp(img_jpg)
        return img_tensor,torch.tensor(self.item[self.file_dir[integer].split("/")[0]],dtype=torch.long)


class testData(Dataset):
    def __init__(self):
        self.root_path = '/home/kureisersen/Documents/python3/ai/data/dateDir/natural_scenes/seg_test/seg_test/'
        self.item = {'buildings':0,'forest':1,'glacier':2,'mountain':3,'sea':4,'street':5}
        self.file_dir = []
        for key, value in self.item.items():
            file_arr = os.listdir(self.root_path + key + '/')
            file_arr = [key + '/' + s for s in file_arr]
            self.file_dir += file_arr

    def __len__(self):
        return len(self.file_dir)

    def __getitem__(self, idx):
        image_path = self.root_path + self.file_dir[idx]
        img_jpg = Image.open(image_path).convert('RGB') 
        to_tensor = transforms.ToTensor()
        img_tensor = to_tensor(img_jpg)
        return img_tensor,torch.tensor(self.item[self.file_dir[idx].split("/")[0]],dtype=torch.long)


class Block(torch.nn.Module):
    def __init__(self,in_channels, out_channels):
        super(Block,self).__init__()

        self.conv_layer = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels, in_channels,1,stride=1), 
            torch.nn.BatchNorm2d(in_channels),
            torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels, in_channels, 3,stride=2,padding=1), 
            torch.nn.BatchNorm2d(in_channels),
            torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels ,out_channels,1, stride=1),
            torch.nn.BatchNorm2d(out_channels),
        )
        self.extra_layer = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels ,out_channels,1, stride=2),
            torch.nn.BatchNorm2d(out_channels),
        )

    def forward(self, input):
        output = self.conv_layer(input)
        output += self.extra_layer(input)
        output = torch.nn.functional.relu(output)
        return output

class Net(torch.nn.Module):
    def __init__(self):
        super(Net,self).__init__()

        self.conv_layer = torch.nn.Sequential(
            torch.nn.Conv2d(3, 64,7,stride=2,padding=3), 
            torch.nn.BatchNorm2d(64),
            Block(64 ,128),
            Block(128 ,512),
            Block(512 ,512),
            Block(512 ,1024),
            Block(1024 ,1024),
            # Block(in_channels ,out_channels),
            # Block(in_channels ,out_channels),
            torch.nn.AdaptiveAvgPool2d((1,1)),
        )
        self.full_layer = torch.nn.Sequential(
            torch.nn.Linear(1024,6), # 1
        )

    def forward(self, input):
        output = self.conv_layer(input)
        output = output.view(input.size(0),-1)
        output = self.full_layer(output)
        return output

def printNet():
    net = Net()
    summary(net, (128, 3, 150, 150)) 



def startTrain():
    BATCH_SIZE = 128


    train_loader = DataLoader(dataset=trainData(), batch_size=BATCH_SIZE, shuffle=True,drop_last=True,num_workers=28,pin_memory=True)
    valid_loader = DataLoader(dataset=testData(), batch_size=BATCH_SIZE, shuffle=True, drop_last=True,num_workers=28,pin_memory=True)



    net = Net().cuda()

    lossFun = torch.nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.Adadelta(net.parameters())

    for i in range(2000):
        loss_sum = 0
        accuracy = 0
        print('--------第{}轮--------'.format(i))

        for idx,(data,lable) in enumerate(train_loader):
            data = data.cuda()
            lable = lable.cuda()

            y_pred = net(data)
            lable = torch.reshape(lable,(1,BATCH_SIZE))[0]

            loss = lossFun(y_pred,lable)


            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_sum += loss.item()
            accuracy += (y_pred.argmax(1)==lable).sum()/BATCH_SIZE


        valid_loss_sum = 0
        valid_accuracy = 0
        with torch.no_grad():
            for idx,(data,lable) in enumerate(valid_loader):
                data = data.cuda()
                lable = lable.cuda()

                y_pred = net(data)
                lable = torch.reshape(lable,(1,BATCH_SIZE))[0]
                loss = lossFun(y_pred,lable)


                valid_loss_sum += loss.item()
                valid_accuracy += (y_pred.argmax(1)==lable).sum()/BATCH_SIZE

        print('第{}轮--平均loss值{}--平均正确率{}--验证集loss值{}--验证集平均正确率{}'.format(i,loss_sum/len(train_loader),accuracy/len(train_loader),valid_loss_sum/len(valid_loader),valid_accuracy/len(valid_loader)))

if __name__ == '__main__':
    startTrain()
    # printNet()

训练结果

训练集正确率稳定在94%，仍然有进一步上升的空间，观察到预测集正确率没有进一步提升的趋势，因此训练提前关停，预测集正确率稳定90%,提升了1个百分点。

2024-07-19