NVIDIA · pushkalkatara · Feb 20, 2020 · Feb 20, 2020 · Feb 20, 2020
diff --git a/Dockerfile b/Dockerfile
@@ -21,7 +21,7 @@ ENV LIBRARY_PATH /usr/local/nvidia/lib64:/usr/local/nvidia/lib:/usr/local/cuda/l
 
 # python3 modules
 RUN wget https://bootstrap.pypa.io/get-pip.py && python3 get-pip.py && \
-        pip3 install --upgrade --no-cache-dir wheel six setuptools cython numpy scipy==1.2.0 \
+        pip3 install --upgrade --no-cache-dir wheel six setuptools cython numpy imageio \
                 matplotlib seaborn scikit-learn scikit-image pillow requests \
                 jupyterlab networkx h5py pandas plotly protobuf tqdm tensorboardX colorama setproctitle && \
         pip3 install https://download.pytorch.org/whl/cu90/torch-1.0.0-cp35-cp35m-linux_x86_64.whl
diff --git a/README.md b/README.md
@@ -48,7 +48,7 @@ L1 and L2 losses with multi-scale support are available in [losses.py](./losses.
 Currently, the code supports python 3
 * numpy 
 * PyTorch ( == 0.4.1, for <= 0.4.0 see branch [python36-PyTorch0.4](https://github.com/NVIDIA/flownet2-pytorch/tree/python36-PyTorch0.4))
-* scipy 
+* imageio
 * scikit-image
 * tensorboardX
 * colorama, tqdm, setproctitle 

diff --git a/datasets.py b/datasets.py
@@ -8,7 +8,7 @@
 from glob import glob
 import utils.frame_utils as frame_utils
 
-from scipy.misc import imread, imresize
+from imageio import imread
 
 class StaticRandomCrop(object):
     def __init__(self, image_size, crop_size):

diff --git a/run_a_pair.py b/run_a_pair.py
@@ -2,44 +2,29 @@
 import numpy as np
 import argparse
 
-from Networks.FlowNet2 import FlowNet2  # the path is depended on where you create this module
-from frame_utils import read_gen  # the path is depended on where you create this module
+from models import FlowNet2
+from utils.frame_utils import read_gen
 
-if __name__ == '__main__':
-    # obtain the necessary args for construct the flownet framework
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--fp16', action='store_true', help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).')
-    parser.add_argument("--rgb_max", type=float, default=255.)
-
-    args = parser.parse_args()
+class Args():
+    fp16 = False
+    rgb_max = 255.
 
+def get_flow(img1, img2, weights):
     # initial a Net
+    args = Args()
     net = FlowNet2(args).cuda()
     # load the state_dict
-    dict = torch.load("/home/hjj/PycharmProjects/flownet2_pytorch/FlowNet2_checkpoint.pth.tar")
+    dict = torch.load(weights)
     net.load_state_dict(dict["state_dict"])
 
     # load the image pair, you can find this operation in dataset.py
-    pim1 = read_gen("/home/hjj/flownet2-master/data/FlyingChairs_examples/0000007-img0.ppm")
-    pim2 = read_gen("/home/hjj/flownet2-master/data/FlyingChairs_examples/0000007-img1.ppm")
+    pim1 = read_gen(img1)
+    pim2 = read_gen(img2)
     images = [pim1, pim2]
     images = np.array(images).transpose(3, 0, 1, 2)
     im = torch.from_numpy(images.astype(np.float32)).unsqueeze(0).cuda()
 
     # process the image pair to obtian the flow
     result = net(im).squeeze()
-
-
-    # save flow, I reference the code in scripts/run-flownet.py in flownet2-caffe project
-    def writeFlow(name, flow):
-        f = open(name, 'wb')
-        f.write('PIEH'.encode('utf-8'))
-        np.array([flow.shape[1], flow.shape[0]], dtype=np.int32).tofile(f)
-        flow = flow.astype(np.float32)
-        flow.tofile(f)
-        f.flush()
-        f.close()
-
-
     data = result.data.cpu().numpy().transpose(1, 2, 0)
-    writeFlow("/home/hjj/flownet2-master/data/FlyingChairs_examples/0000007-img.flo", data)
+    return data
diff --git a/utils/frame_utils.py b/utils/frame_utils.py
@@ -1,6 +1,6 @@
 import numpy as np
 from os.path import *
-from scipy.misc import imread
+from imageio import imread
 from . import flow_utils 
 
 def read_gen(file_name):
@@ -16,3 +16,134 @@ def read_gen(file_name):
     elif ext == '.flo':
         return flow_utils.readFlow(file_name).astype(np.float32)
     return []
+
+UNKNOWN_FLOW_THRESH = 1e7
+def flow_to_image(flow):
+    """
+    Convert flow into middlebury color code image
+    :param flow: optical flow map
+    :return: optical flow image in middlebury color
+    """
+    u = flow[:, :, 0]
+    v = flow[:, :, 1]
+
+    maxu = -999.
+    maxv = -999.
+    minu = 999.
+    minv = 999.
+
+    idxUnknow = (abs(u) > UNKNOWN_FLOW_THRESH) | (abs(v) > UNKNOWN_FLOW_THRESH)
+    u[idxUnknow] = 0
+    v[idxUnknow] = 0
+
+    maxu = max(maxu, np.max(u))
+    minu = min(minu, np.min(u))
+
+    maxv = max(maxv, np.max(v))
+    minv = min(minv, np.min(v))
+
+    rad = np.sqrt(u ** 2 + v ** 2)
+    maxrad = max(-1, np.max(rad))
+
+    u = u/(maxrad + np.finfo(float).eps)
+    v = v/(maxrad + np.finfo(float).eps)
+
+    img = compute_color(u, v)
+
+    idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2)
+    img[idx] = 0
+
+    return np.uint8(img)
+
+def compute_color(u, v):
+    """
+    compute optical flow color map
+    :param u: optical flow horizontal map
+    :param v: optical flow vertical map
+    :return: optical flow in color code
+    """
+    [h, w] = u.shape
+    img = np.zeros([h, w, 3])
+    nanIdx = np.isnan(u) | np.isnan(v)
+    u[nanIdx] = 0
+    v[nanIdx] = 0
+
+    colorwheel = make_color_wheel()
+    ncols = np.size(colorwheel, 0)
+
+    rad = np.sqrt(u**2+v**2)
+
+    a = np.arctan2(-v, -u) / np.pi
+
+    fk = (a+1) / 2 * (ncols - 1) + 1
+
+    k0 = np.floor(fk).astype(int)
+
+    k1 = k0 + 1
+    k1[k1 == ncols+1] = 1
+    f = fk - k0
+
+    for i in range(0, np.size(colorwheel,1)):
+        tmp = colorwheel[:, i]
+        col0 = tmp[k0-1] / 255
+        col1 = tmp[k1-1] / 255
+        col = (1-f) * col0 + f * col1
+
+        idx = rad <= 1
+        col[idx] = 1-rad[idx]*(1-col[idx])
+        notidx = np.logical_not(idx)
+
+        col[notidx] *= 0.75
+        img[:, :, i] = np.uint8(np.floor(255 * col*(1-nanIdx)))
+
+    return img
+
+
+def make_color_wheel():
+    """
+    Generate color wheel according Middlebury color code
+    :return: Color wheel
+    """
+    RY = 15
+    YG = 6
+    GC = 4
+    CB = 11
+    BM = 13
+    MR = 6
+
+    ncols = RY + YG + GC + CB + BM + MR
+
+    colorwheel = np.zeros([ncols, 3])
+
+    col = 0
+
+    # RY
+    colorwheel[0:RY, 0] = 255
+    colorwheel[0:RY, 1] = np.transpose(np.floor(255*np.arange(0, RY) / RY))
+    col += RY
+
+    # YG
+    colorwheel[col:col+YG, 0] = 255 - np.transpose(np.floor(255*np.arange(0, YG) / YG))
+    colorwheel[col:col+YG, 1] = 255
+    col += YG
+
+    # GC
+    colorwheel[col:col+GC, 1] = 255
+    colorwheel[col:col+GC, 2] = np.transpose(np.floor(255*np.arange(0, GC) / GC))
+    col += GC
+
+    # CB
+    colorwheel[col:col+CB, 1] = 255 - np.transpose(np.floor(255*np.arange(0, CB) / CB))
+    colorwheel[col:col+CB, 2] = 255
+    col += CB
+
+    # BM
+    colorwheel[col:col+BM, 2] = 255
+    colorwheel[col:col+BM, 0] = np.transpose(np.floor(255*np.arange(0, BM) / BM))
+    col += + BM
+
+    # MR
+    colorwheel[col:col+MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR))
+    colorwheel[col:col+MR, 0] = 255
+
+    return colorwheel