ViewAL/utils/superpixel_projections.py at master · nihalsid/ViewAL · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import numpy as np
import os
import constants
from numpy.linalg import inv
from dataloader import indoor_scenes
import torch
from collections import OrderedDict, Counter
from tqdm import tqdm

def project_image_to_world(x, y, depth, cam2world, depth_intrinsic):
    I = torch.zeros(4, depth.shape[0]).type(torch.cuda.FloatTensor)
    I[0, :] = x * depth
    I[1, :] = y * depth
    I[2, :] = depth
    I[3, :] = 1.0
    world_coordinates = torch.mm(torch.from_numpy(cam2world).type(torch.cuda.FloatTensor), torch.mm(
        torch.from_numpy(inv(depth_intrinsic)).type(torch.cuda.FloatTensor), I))

    del I, x, y, depth
    torch.cuda.empty_cache()

    return world_coordinates

def project_images_to_world(depths, cam2worlds, depth_intrinsic, superpixels, frames):
    x = np.linspace(0, constants.DEPTH_WIDTH - 1, constants.DEPTH_WIDTH)
    y = np.linspace(0, constants.DEPTH_HEIGHT - 1, constants.DEPTH_HEIGHT)
    x_mesh, y_mesh = np.meshgrid(x, y)

    world_coordinates = torch.zeros(4, len(depths) * constants.DEPTH_WIDTH * constants.DEPTH_HEIGHT).type(torch.cuda.FloatTensor)
    frame_origins = torch.zeros(len(depths) * constants.DEPTH_WIDTH * constants.DEPTH_HEIGHT).type(torch.cuda.IntTensor)
    superpixel_origins = torch.zeros(len(depths) * constants.DEPTH_WIDTH * constants.DEPTH_HEIGHT).type(torch.cuda.IntTensor)

    for im_idx in range(len(depths)):
        world_coordinates[:, im_idx * constants.DEPTH_WIDTH * constants.DEPTH_HEIGHT: (im_idx + 1) * constants.DEPTH_WIDTH * constants.DEPTH_HEIGHT] = project_image_to_world(torch.from_numpy(x_mesh).type(torch.cuda.FloatTensor).flatten(),
                                                                                                                                                                              torch.from_numpy(y_mesh).type(torch.cuda.FloatTensor).flatten(), torch.from_numpy(depths[im_idx][:]).type(torch.cuda.FloatTensor).flatten(), cam2worlds[im_idx], depth_intrinsic)
        frame_origins[im_idx * constants.DEPTH_WIDTH * constants.DEPTH_HEIGHT: (im_idx + 1) * constants.DEPTH_WIDTH * constants.DEPTH_HEIGHT] = torch.ones(
            constants.DEPTH_WIDTH * constants.DEPTH_HEIGHT).type(torch.cuda.IntTensor) * frames[im_idx]

        superpixel_origins[im_idx * constants.DEPTH_WIDTH * constants.DEPTH_HEIGHT: (im_idx + 1) * constants.DEPTH_WIDTH * constants.DEPTH_HEIGHT] = torch.from_numpy(superpixels[im_idx].astype(np.int).flatten()).type(torch.cuda.IntTensor)

    # visualize_point_cloud(world_coordinates)

    return world_coordinates, frame_origins, superpixel_origins

def project_world_to_image(depth, superpixel_map, cam2world, depth_intrinsic, world_coordinates, frame_origins, superpixel_origins):
    world_coordinates_copy = world_coordinates.transpose(0, 1)[:, :3]
    projected_points = torch.mm(torch.mm(torch.from_numpy(depth_intrinsic).type(torch.cuda.FloatTensor),
                                         torch.from_numpy(inv(cam2world)).type(torch.cuda.FloatTensor)), world_coordinates)
    projected_points = projected_points.transpose(0, 1)[:, :3]
    projected_points[:, 0] /= projected_points[:, 2]
    projected_points[:, 1] /= projected_points[:, 2]
    projected_points[:, 2] /= projected_points[:, 2]
    selection_mask = ~torch.isnan(projected_points[:, 2])

    projected_points = torch.round(projected_points[selection_mask])
    frame_origins = frame_origins[selection_mask]
    superpixel_origins = superpixel_origins[selection_mask]
    world_coordinates_copy = world_coordinates_copy[selection_mask]

    # remove out of frame bounds
    selection_mask = (projected_points[:, 0] >= 0) & (projected_points[:, 0] < constants.DEPTH_WIDTH) & (
        projected_points[:, 1] >= 0) & (projected_points[:, 1] < constants.DEPTH_HEIGHT)

    projected_points = projected_points[selection_mask][:, :2]
    frame_origins = frame_origins[selection_mask]
    superpixel_origins = superpixel_origins[selection_mask]
    world_coordinates_copy = world_coordinates_copy[selection_mask]

    depth = torch.from_numpy(depth).type(torch.cuda.FloatTensor)
    depth = depth[projected_points[:, 1].type(torch.cuda.LongTensor), projected_points[:, 0].type(torch.cuda.LongTensor)].flatten()
    backprojected_points = project_image_to_world(projected_points[:, 0], projected_points[
        :, 1], depth, cam2world, depth_intrinsic).transpose(0, 1)[:, :3]

    selection_mask = (torch.norm(world_coordinates_copy - backprojected_points, dim=1) < constants.WORLD_DISTANCE_THRESHOLD)

    projected_points = projected_points[selection_mask]

    if projected_points.shape[0] == 0:
        return None

    frame_origins = frame_origins[selection_mask]
    superpixel_origins = superpixel_origins[selection_mask]
    superpixel_targets = superpixel_map[projected_points[:, 1].type(torch.cuda.LongTensor).cpu().numpy(), projected_points[:, 0].type(torch.cuda.LongTensor).cpu().numpy()].flatten()
    t1, t2 = np.unique(superpixel_map, return_counts=True)
    target_superpixel_sizes = dict(zip(t1, t2))

    frame_spx = torch.zeros((frame_origins.shape[0], 3)).type(torch.cuda.IntTensor)
    frame_spx[:, 0] = torch.from_numpy(superpixel_targets.astype(np.int)).type(torch.cuda.IntTensor)
    frame_spx[:, 1] = frame_origins
    frame_spx[:, 2] = superpixel_origins

    uniques, counts = torch.unique(frame_spx, dim=0, return_counts=True)
    frame_spx_counts = {}
    for idx, u in enumerate(uniques.tolist()):
        frame_spx_counts[tuple(u)] = float(counts[idx].cpu().item())

    coverage_dict = {}
    for i in frame_spx_counts:
        coverage = frame_spx_counts[i] / target_superpixel_sizes[i[0]]
        coverage_dict[(i[0], i[1], i[2])] = coverage

    return coverage_dict  # , projected_points


def find_superpixel_coverage(dataset_name, lmdb_handle, superpixel_dir, base_size, images):
    dataset = indoor_scenes.IndoorScenesWithAllInfo(dataset_name, lmdb_handle, superpixel_dir, base_size, images)
    scene_id_to_index = dataset.scene_id_to_index

    image_paths = []

    for scene_id in tqdm(scene_id_to_index, desc='Scene[Coverage]'):
        all_frame_coverages = OrderedDict()
        depths = []
        poses = []
        superpixels = []
        intrinsic = None

        for frame_id in scene_id_to_index[scene_id]:
            sample = dataset[frame_id]
            depths.append(sample['depth'])
            poses.append(sample['pose'])
            superpixels.append(sample['superpixel'])
            intrinsic = sample['intrinsic']

        world_coordinates, frame_origins, superpixel_origins = project_images_to_world(depths, poses, intrinsic, superpixels, scene_id_to_index[scene_id])

        for frame_id in tqdm(scene_id_to_index[scene_id], desc='Scene[Project]'):
            sample = dataset[frame_id]
            frame_coverages = project_world_to_image(sample['depth'], sample['superpixel'], sample['pose'], sample['intrinsic'], world_coordinates, frame_origins, superpixel_origins)
            if not frame_coverages is None:
                all_frame_coverages[frame_id] = frame_coverages
                image_paths.append(images[frame_id])
        #from pprint import pprint
        #pprint(all_frame_coverages)
        np.save(os.path.join(constants.SSD_DATASET_ROOT, dataset_name, "raw", "selections", "coverage_"+superpixel_dir, f'{scene_id}.npy'), all_frame_coverages)

        del world_coordinates, frame_origins, superpixel_origins
        del depths, poses, superpixels, all_frame_coverages
        torch.cuda.empty_cache()

    with open(os.path.join(constants.SSD_DATASET_ROOT, dataset_name, "raw", "selections", "coverage_"+superpixel_dir, "coverage_paths.txt"), "w") as fptr:
        for p in image_paths:
            fptr.write(p.decode() + "\n")


def test_coverage_scannet_sample():
    import constants
    import os
    from dataloader import dataset_base
    from dataloader.indoor_scenes import IndoorScenes
    lmdb_handle = dataset_base.LMDBHandle(os.path.join(constants.HDD_DATASET_ROOT, "scannet-sample", "dataset.lmdb"), False)
    train_set = IndoorScenes('scannet-sample', lmdb_handle, (240, 320), 'train')
    find_superpixel_coverage('scannet-sample', lmdb_handle, (240, 320), train_set.image_path_subset)

if __name__=='__main__':
    test_coverage_scannet_sample()