Hi all,
I’m new to PL, but comfortable in Pytorch, and Ive been trying to build a super simple classifier model using PL, to learn the ropes.
My model runs fine on CPU but when I add accelerator="gpu", devices=1
to pl.Trainer()
I get the following frustrating Runtime error:
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument index in method wrapper__index_select)
The data set is a simple pandas dataframe, loaded from csv, with a bunch of continuous data fields and a categorical field needed for embedding
Heres my code…
# Dataset...
class RaspDataset(Dataset):
def __init__(self, df):
self.X = df.values[:,1:-2]
self.takeoff = df.values[:,-2]
self.Y = df.values[:, -1]
def __len__(self):
return self.X.shape[0]
def __getitem__(self, idx):
y = self.Y[idx]
return self.X[idx], self.takeoff[idx], y
Heres my NN model…
class RaspModel(pl.LightningModule):
def __init__(self):
super().__init__()
self.takeoff_embedder = nn.Embedding(df.takeoff.nunique()+1, 5)
self.l1 = nn.Linear(35, 10)
self.relu1 = nn.ReLU()
self.l2 = nn.Linear(10,1)
self.sig1 = nn.Sigmoid()
def forward(self, x, takeoff):
takeoff = takeoff.type(LongTensor)
takeoff = self.takeoff_embedder(takeoff)
x = cat((takeoff, x),1)
x = x.type(FloatTensor)
x = self.l1(x)
x = self.relu1(x)
x = self.l2(x)
return self.sig1(x)
And heres my PL code…
class LitModel(pl.LightningModule):
def __init__(self, rasp_model, data, weight):
super().__init__()
self.rasp_model = rasp_model
self.loss_fn = nn.BCELoss(reduce=False)
self.data = data
self.weight = from_numpy(weight)
def training_step(self, batch, batch_idx):
x, takeoff, y = batch
weight_ = self.weight[y.data.view(-1).long()].view_as(y)
y = y.view(-1, 1)
y = y.type(FloatTensor)
x_hat = self.rasp_model(x, takeoff)
loss = self.loss_fn(x_hat, y)
loss = loss * weight_
loss = loss.mean()
return {'loss':loss}
def test_step(self, batch, batch_idx):
x, takeoff, y = batch
weight_ = self.weight[y.data.view(-1).long()].view_as(y)
y = y.view(-1, 1)
y = y.type(FloatTensor)
x_hat = self.rasp_model(x, takeoff)
test_loss = self.loss_fn(x_hat, y)
test_loss = test_loss * weight_
test_loss = test_loss.mean()
return {'loss':test_loss}
def predict_step(self, batch, batch_idx, dataloader_idx=0):
x, takeoff, y = batch
pred = self.rasp_model(x, takeoff)
return pred
def configure_optimizers(self):
optimizer = Adam(self.parameters(), lr=1e-3)
return optimizer
def train_dataloader(self):
rasp_train_dataset = RaspDataset(self.data)
#rasp_test_dataset = RaspDataset(test_df)
rasp_train_loader = DataLoader(rasp_train_dataset, batch_size =10, shuffle=True)
#rasp_test_loader = DataLoader(rasp_test_dataset, shuffle=False)
return rasp_train_loader
Im sure the solution is really simple, I just cant see it!
Thanks in advance