diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/DouOne.iml b/.idea/DouOne.iml new file mode 100644 index 0000000..8b8c395 --- /dev/null +++ b/.idea/DouOne.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..a739919 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/douzero/dmc/models.py b/douzero/dmc/models.py index 4343ddf..53e017c 100644 --- a/douzero/dmc/models.py +++ b/douzero/dmc/models.py @@ -80,9 +80,9 @@ def forward(self, z, x, return_value=False, flags=None): # Model dict is only used in evaluation but not training model_dict = {} -model_dict['landlord'] = LandlordLstmModel -model_dict['landlord_up'] = FarmerLstmModel -model_dict['landlord_down'] = FarmerLstmModel +model_dict['landlord'] = GeneralModel +model_dict['landlord_up'] = GeneralModel +model_dict['landlord_down'] = GeneralModel class Model: """ @@ -93,9 +93,9 @@ def __init__(self, device=0): self.models = {} if not device == "cpu": device = 'cuda:' + str(device) - self.models['landlord'] = LandlordLstmModel().to(torch.device(device)) - self.models['landlord_up'] = FarmerLstmModel().to(torch.device(device)) - self.models['landlord_down'] = FarmerLstmModel().to(torch.device(device)) + self.models['landlord'] = GeneralModel().to(torch.device(device)) + self.models['landlord_up'] = GeneralModel().to(torch.device(device)) + self.models['landlord_down'] = GeneralModel().to(torch.device(device)) def forward(self, position, z, x, training=False, flags=None): model = self.models[position] @@ -119,3 +119,85 @@ def get_model(self, position): def get_models(self): return self.models + +# Added from https://github.com/Vincentzyx/Douzero_Resnet/blob/main/douzero/dmc/models.py + +# 用于ResNet18和34的残差块,用的是2个3x3的卷积 +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, in_planes, planes, stride=1): + super(BasicBlock, self).__init__() + self.conv1 = nn.Conv1d(in_planes, planes, kernel_size=(3,), + stride=(stride,), padding=1, bias=False) + self.bn1 = nn.BatchNorm1d(planes) + self.conv2 = nn.Conv1d(planes, planes, kernel_size=(3,), + stride=(1,), padding=1, bias=False) + self.bn2 = nn.BatchNorm1d(planes) + self.shortcut = nn.Sequential() + # 经过处理后的x要与x的维度相同(尺寸和深度) + # 如果不相同,需要添加卷积+BN来变换为同一维度 + if stride != 1 or in_planes != self.expansion * planes: + self.shortcut = nn.Sequential( + nn.Conv1d(in_planes, self.expansion * planes, + kernel_size=(1,), stride=(stride,), bias=False), + nn.BatchNorm1d(self.expansion * planes) + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.bn2(self.conv2(out)) + out += self.shortcut(x) + out = F.relu(out) + return out + + +class GeneralModel(nn.Module): + def __init__(self): + super().__init__() + self.in_planes = 80 + #input 1*54*41 + self.conv1 = nn.Conv1d(40, 80, kernel_size=(3,), + stride=(2,), padding=1, bias=False) #1*27*80 + + self.bn1 = nn.BatchNorm1d(80) + + self.layer1 = self._make_layer(BasicBlock, 80, 2, stride=2)#1*14*80 + self.layer2 = self._make_layer(BasicBlock, 160, 2, stride=2)#1*7*160 + self.layer3 = self._make_layer(BasicBlock, 320, 2, stride=2)#1*4*320 + # self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) + self.linear1 = nn.Linear(320 * BasicBlock.expansion * 4 + 15 * 4, 1024) + self.linear2 = nn.Linear(1024, 512) + self.linear3 = nn.Linear(512, 256) + self.linear4 = nn.Linear(256, 1) + + def _make_layer(self, block, planes, num_blocks, stride): + strides = [stride] + [1] * (num_blocks - 1) + layers = [] + for stride in strides: + layers.append(block(self.in_planes, planes, stride)) + self.in_planes = planes * block.expansion + return nn.Sequential(*layers) + + def forward(self, z, x, return_value=False, flags=None, debug=False): + out = F.relu(self.bn1(self.conv1(z))) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = out.flatten(1,2) + out = torch.cat([x,x,x,x,out], dim=-1) + out = F.leaky_relu_(self.linear1(out)) + out = F.leaky_relu_(self.linear2(out)) + out = F.leaky_relu_(self.linear3(out)) + out = F.leaky_relu_(self.linear4(out)) + if return_value: + return dict(values=out) + else: + if flags is not None and flags.exp_epsilon > 0 and np.random.rand() < flags.exp_epsilon: + action = torch.randint(out.shape[0], (1,))[0] + else: + action = torch.argmax(out,dim=0)[0] + return dict(action=action, max_value=torch.max(out)) + + +