From 75817ef205a3dfc8bce461fa1e015106a38d10be Mon Sep 17 00:00:00 2001
From: Ian Wienand <iwienand@redhat.com>
Date: Wed, 24 May 2017 09:57:32 +1000
Subject: [PATCH] Use networkx for digraph

This switches the code to use networkx for the digraph implementation.

Note that the old implementation specifically isn't removed in this
change -- for review clarity.  It will be replaced by a base class
that defines things properly to the API described below.

Plugins return a node object with three functions

 get_name() : return the unique name of this node

 get_nodes() : return a list of nodes for insertion into the graph.
  Usually this is just "self".  Some special things like partitioning
  add extra nodes at this point, however.

 get_edges() : return a tuple of two lists; edges_from and edges_to
  As you would expect the first is a list of node names that points to
  us, and the second is a list of node names we point to.  Usually
  this is only populated as ([self.base],[]) -- i.e. our "base" node
  points to us.  Some plugins, such as mounting, create links both to
  and from themselves, however.

Plugins have been updated, some test cases added (error cases
specifically)

Change-Id: Ic5a61365ef0132476b11bdbf1dd96885e91c3cb6
---
 diskimage_builder/block_device/blockdevice.py | 89 ++++++++++++++-----
 .../block_device/level0/localloop.py          | 10 +--
 .../block_device/level1/partition.py          | 16 ++--
 .../block_device/level1/partitioning.py       |  7 +-
 diskimage_builder/block_device/level2/mkfs.py | 20 ++---
 .../block_device/level3/mount.py              | 28 +++---
 .../block_device/level4/fstab.py              | 14 ++-
 .../tests/config/bad_edge_graph.yaml          | 28 ++++++
 .../tests/config/duplicate_name.yaml          | 28 ++++++
 .../tests/config/multi_key_node.yaml          |  8 ++
 .../tests/config/simple_graph.yaml            |  1 +
 .../tests/config/simple_tree.yaml             |  1 +
 .../block_device/tests/test_config.py         | 27 ++++++
 requirements.txt                              |  1 +
 14 files changed, 202 insertions(+), 76 deletions(-)
 create mode 100644 diskimage_builder/block_device/tests/config/bad_edge_graph.yaml
 create mode 100644 diskimage_builder/block_device/tests/config/duplicate_name.yaml
 create mode 100644 diskimage_builder/block_device/tests/config/multi_key_node.yaml

diff --git a/diskimage_builder/block_device/blockdevice.py b/diskimage_builder/block_device/blockdevice.py
index 38da314cb..8d2cd5148 100644
--- a/diskimage_builder/block_device/blockdevice.py
+++ b/diskimage_builder/block_device/blockdevice.py
@@ -20,6 +20,8 @@ import shutil
 import sys
 import yaml
 
+import networkx as nx
+
 from stevedore import extension
 
 from diskimage_builder.block_device.config import \
@@ -27,7 +29,6 @@ from diskimage_builder.block_device.config import \
 from diskimage_builder.block_device.exception import \
     BlockDeviceSetupException
 from diskimage_builder.block_device.utils import exec_sudo
-from diskimage_builder.graph.digraph import Digraph
 
 
 logger = logging.getLogger(__name__)
@@ -166,40 +167,86 @@ class BlockDevice(object):
             json.dump(state, fd)
 
     def create_graph(self, config, default_config):
-        logger.debug("Create graph [%s]" % config)
+        """Generate configuration digraph
+
+        Generate the configuration digraph from the config
+
+        :param config: graph configuration file
+        :param default_config: default parameters (from --params)
+        :return: tuple with the graph object, nodes in call order
+        """
         # This is the directed graph of nodes: each parse method must
         # add the appropriate nodes and edges.
-        dg = Digraph()
+        dg = nx.DiGraph()
 
         for config_entry in config:
-            if len(config_entry) != 1:
-                logger.error("Invalid config entry: more than one key "
-                             "on top level [%s]" % config_entry)
-                raise BlockDeviceSetupException(
-                    "Top level config must contain exactly one key per entry")
+            # this should have been checked by generate_config
+            assert len(config_entry) == 1
+
             logger.debug("Config entry [%s]" % config_entry)
             cfg_obj_name = list(config_entry.keys())[0]
             cfg_obj_val = config_entry[cfg_obj_name]
 
-            # As the first step the configured objects are created
-            # (if it exists)
+            # Instantiate a "plugin" object, passing it the
+            # configuration entry
             if cfg_obj_name not in self.plugin_manager:
-                logger.error("Configured top level element [%s] "
-                             "does not exists." % cfg_obj_name)
-                return 1
+                raise BlockDeviceSetupException(
+                    ("Config element [%s] is not implemented" % cfg_obj_name))
             cfg_obj = self.plugin_manager[cfg_obj_name].plugin(
                 cfg_obj_val, default_config)
 
-            # At this point it is only possible to add the nodes:
-            # adding the edges needs all nodes first.
-            cfg_obj.insert_nodes(dg)
+            # Ask the plugin for the nodes it would like to insert
+            # into the graph.  Some plugins, such as partitioning,
+            # return multiple nodes from one config entry.
+            nodes = cfg_obj.get_nodes()
+            for node in nodes:
+                # would only be missing if a plugin was way out of
+                # line and didn't put it in...
+                assert node.name
+                # ensure node names are unique.  networkx by default
+                # just appends the attribute to the node dict for
+                # existing nodes, which is not what we want.
+                if node.name in dg.node:
+                    raise BlockDeviceSetupException(
+                        "Duplicate node name: %s" % (node.name))
+                logger.debug("Adding %s : %s", node.name, node)
+                dg.add_node(node.name, obj=node)
 
-        # Now that all the nodes exists: add also the edges
-        for node in dg.get_iter_nodes_values():
-            node.insert_edges(dg)
+        # Now find edges
+        for name, attr in dg.nodes(data=True):
+            obj = attr['obj']
+            # Unfortunately, we can not determine node edges just from
+            # the configuration file.  It's not always simply the
+            # "base:" pointer.  So ask nodes for a list of nodes they
+            # want to point to.  *mostly* it's just base: ... but
+            # mounting is different.
+            #  edges_from are the nodes that point to us
+            #  edges_to are the nodes we point to
+            edges_from, edges_to = obj.get_edges()
+            logger.debug("Edges for %s: f:%s t:%s", name,
+                         edges_from, edges_to)
+            for edge_from in edges_from:
+                if edge_from not in dg.node:
+                    raise BlockDeviceSetupException(
+                        "Edge not defined: %s->%s" % (edge_from, name))
+                dg.add_edge(edge_from, name)
+            for edge_to in edges_to:
+                if edge_to not in dg.node:
+                    raise BlockDeviceSetupException(
+                        "Edge not defined: %s->%s" % (name, edge_to))
+                dg.add_edge(name, edge_to)
+
+        # this can be quite helpful debugging but needs pydotplus.
+        # run "dotty /tmp/out.dot"
+        #  XXX: maybe an env var that dumps to a tmpdir or something?
+        # nx.nx_pydot.write_dot(dg, '/tmp/graph_dump.dot')
+
+        # Topological sort (i.e. create a linear array that satisfies
+        # dependencies) and return the object list
+        call_order_nodes = nx.topological_sort(dg)
+        logger.debug("Call order: %s", list(call_order_nodes))
+        call_order = [dg.node[n]['obj'] for n in call_order_nodes]
 
-        call_order = dg.topological_sort()
-        logger.debug("Call order [%s]" % (list(call_order)))
         return dg, call_order
 
     def create(self, result, rollback):
diff --git a/diskimage_builder/block_device/level0/localloop.py b/diskimage_builder/block_device/level0/localloop.py
index aa33531dd..2fe6c12c5 100644
--- a/diskimage_builder/block_device/level0/localloop.py
+++ b/diskimage_builder/block_device/level0/localloop.py
@@ -48,13 +48,13 @@ class LocalLoop(Digraph.Node):
         Digraph.Node.__init__(self, self.name)
         self.filename = os.path.join(self.image_dir, self.name + ".raw")
 
-    def insert_edges(self, dg):
+    def get_edges(self):
         """Because this is created without base, there are no edges."""
-        pass
+        return ([], [])
 
-    def insert_nodes(self, dg):
-        """Adds self as a node to the given digraph"""
-        dg.add_node(self)
+    def get_nodes(self):
+        """Returns nodes for adding to the graph"""
+        return [self]
 
     @staticmethod
     def image_create(filename, size):
diff --git a/diskimage_builder/block_device/level1/partition.py b/diskimage_builder/block_device/level1/partition.py
index 7a61176c8..50c96047e 100644
--- a/diskimage_builder/block_device/level1/partition.py
+++ b/diskimage_builder/block_device/level1/partition.py
@@ -55,11 +55,6 @@ class Partition(Digraph.Node):
 
         self.ptype = int(config['type'], 16) if 'type' in config else 0x83
 
-    def __repr__(self):
-        return "<Partition [%s] on [%s] size [%s] prev [%s]>" \
-            % (self.name, self.base, self.size,
-               self.prev_partition.name if self.prev_partition else "UNSET")
-
     def get_flags(self):
         return self.flags
 
@@ -72,13 +67,12 @@ class Partition(Digraph.Node):
     def get_name(self):
         return self.name
 
-    def insert_edges(self, dg):
-        bnode = dg.find(self.base)
-        assert bnode is not None
-        dg.create_edge(bnode, self)
+    def get_edges(self):
+        edge_from = [self.base]
+        edge_to = []
         if self.prev_partition is not None:
-            logger.debug("Insert edge [%s]" % self)
-            dg.create_edge(self.prev_partition, self)
+            edge_from.append(self.prev_partition.name)
+        return (edge_from, edge_to)
 
     def create(self, result, rollback):
         self.partitioning.create(result, rollback)
diff --git a/diskimage_builder/block_device/level1/partitioning.py b/diskimage_builder/block_device/level1/partitioning.py
index e11ae962e..7b04344b6 100644
--- a/diskimage_builder/block_device/level1/partitioning.py
+++ b/diskimage_builder/block_device/level1/partitioning.py
@@ -84,10 +84,9 @@ class Partitioning(Digraph.Node):
             fd.seek(0, 2)
             return fd.tell()
 
-    def insert_nodes(self, dg):
-        for part in self.partitions:
-            logger.debug("Insert node [%s]" % part)
-            dg.add_node(part)
+    def get_nodes(self):
+        # We just add partitions
+        return self.partitions
 
     def _all_part_devices_exist(self, expected_part_devices):
         for part_device in expected_part_devices:
diff --git a/diskimage_builder/block_device/level2/mkfs.py b/diskimage_builder/block_device/level2/mkfs.py
index 9d891597e..71aef546f 100644
--- a/diskimage_builder/block_device/level2/mkfs.py
+++ b/diskimage_builder/block_device/level2/mkfs.py
@@ -96,15 +96,10 @@ class Filesystem(Digraph.Node):
 
         logger.debug("Filesystem created [%s]" % self)
 
-    def __repr__(self):
-        return "<Filesystem base [%s] name [%s] type [%s]>" \
-            % (self.base, self.name, self.type)
-
-    def insert_edges(self, dg):
-        logger.debug("Insert edge [%s]" % self)
-        bnode = dg.find(self.base)
-        assert bnode is not None
-        dg.create_edge(bnode, self)
+    def get_edges(self):
+        edge_from = [self.base]
+        edge_to = []
+        return (edge_from, edge_to)
 
     def create(self, result, rollback):
         logger.info("create called; result [%s]" % result)
@@ -174,7 +169,8 @@ class Mkfs(object):
         fs = Filesystem(self.config)
         self.filesystems[fs.get_name()] = fs
 
-    def insert_nodes(self, dg):
+    def get_nodes(self):
+        nodes = []
         for _, fs in self.filesystems.items():
-            logger.debug("Insert node [%s]" % fs)
-            dg.add_node(fs)
+            nodes.append(fs)
+        return nodes
diff --git a/diskimage_builder/block_device/level3/mount.py b/diskimage_builder/block_device/level3/mount.py
index b709f4513..f8ef83145 100644
--- a/diskimage_builder/block_device/level3/mount.py
+++ b/diskimage_builder/block_device/level3/mount.py
@@ -45,11 +45,7 @@ class MountPoint(Digraph.Node):
         Digraph.Node.__init__(self, self.name)
         logger.debug("MountPoint created [%s]" % self)
 
-    def __repr__(self):
-        return "<MountPoint base [%s] name [%s] mount_point [%s]>" \
-            % (self.base, self.name, self.mount_point)
-
-    def insert_node(self, dg):
+    def get_node(self):
         global mount_points
         if self.mount_point in mount_points:
             raise BlockDeviceSetupException(
@@ -57,9 +53,9 @@ class MountPoint(Digraph.Node):
                 % self.mount_point)
         logger.debug("Insert node [%s]" % self)
         mount_points[self.mount_point] = self
-        dg.add_node(self)
+        return self
 
-    def insert_edges(self, dg):
+    def get_edges(self):
         """Insert all edges
 
         After inserting all the nodes, the order of the mounting and
@@ -74,7 +70,8 @@ class MountPoint(Digraph.Node):
         ensures that during mounting (and umounting) the correct
         order is used.
         """
-        logger.debug("Insert edge [%s]" % self)
+        edge_from = []
+        edge_to = []
         global mount_points
         global sorted_mount_points
         if sorted_mount_points is None:
@@ -86,11 +83,11 @@ class MountPoint(Digraph.Node):
         mpi = sorted_mount_points.index(self.mount_point)
         if mpi > 0:
             # If not the first: add also the dependency
-            dg.create_edge(mount_points[sorted_mount_points[mpi - 1]], self)
+            dep = mount_points[sorted_mount_points[mpi - 1]]
+            edge_from.append(dep.name)
 
-        bnode = dg.find(self.base)
-        assert bnode is not None
-        dg.create_edge(bnode, self)
+        edge_from.append(self.base)
+        return (edge_from, edge_to)
 
     def create(self, result, rollback):
         logger.debug("mount called [%s]" % self.mount_point)
@@ -142,12 +139,13 @@ class Mount(object):
         self.mount_base = self.params['mount-base']
 
         self.mount_points = {}
-
         mp = MountPoint(self.mount_base, self.config)
         self.mount_points[mp.get_name()] = mp
 
-    def insert_nodes(self, dg):
+    def get_nodes(self):
         global sorted_mount_points
         assert sorted_mount_points is None
+        nodes = []
         for _, mp in self.mount_points.items():
-            mp.insert_node(dg)
+            nodes.append(mp.get_node())
+        return nodes
diff --git a/diskimage_builder/block_device/level4/fstab.py b/diskimage_builder/block_device/level4/fstab.py
index cf4508842..b0883984d 100644
--- a/diskimage_builder/block_device/level4/fstab.py
+++ b/diskimage_builder/block_device/level4/fstab.py
@@ -36,15 +36,13 @@ class Fstab(Digraph.Node):
         self.dump_freq = self.config.get('dump-freq', 0)
         self.fsck_passno = self.config.get('fsck-passno', 2)
 
-    def insert_nodes(self, dg):
-        logger.debug("Insert node")
-        dg.add_node(self)
+    def get_nodes(self):
+        return [self]
 
-    def insert_edges(self, dg):
-        logger.debug("Insert edge [%s]" % self)
-        bnode = dg.find(self.base)
-        assert bnode is not None
-        dg.create_edge(bnode, self)
+    def get_edges(self):
+        edge_from = [self.base]
+        edge_to = []
+        return (edge_from, edge_to)
 
     def create(self, result, rollback):
         logger.debug("fstab create called [%s]" % self.name)
diff --git a/diskimage_builder/block_device/tests/config/bad_edge_graph.yaml b/diskimage_builder/block_device/tests/config/bad_edge_graph.yaml
new file mode 100644
index 000000000..2ffef0142
--- /dev/null
+++ b/diskimage_builder/block_device/tests/config/bad_edge_graph.yaml
@@ -0,0 +1,28 @@
+- local_loop:
+    name: image0
+
+- partitioning:
+    base: image0
+    name: mbr
+    label: mbr
+    partitions:
+      - flags: [boot, primary]
+        name: root
+        base: image0
+        size: 100%
+
+- mount:
+    base: mkfs_root
+    name: mount_mkfs_root
+    mount_point: /
+
+- fstab:
+    base: mount_mkfs_root
+    name: fstab_mount_mkfs_root
+    fsck-passno: 1
+    options: defaults
+
+- mkfs:
+    base: this_is_not_a_node
+    name: mkfs_root
+    type: ext4
diff --git a/diskimage_builder/block_device/tests/config/duplicate_name.yaml b/diskimage_builder/block_device/tests/config/duplicate_name.yaml
new file mode 100644
index 000000000..d6ce411aa
--- /dev/null
+++ b/diskimage_builder/block_device/tests/config/duplicate_name.yaml
@@ -0,0 +1,28 @@
+- local_loop:
+    name: this_is_a_duplicate
+
+- partitioning:
+    base: this_is_a_duplicate
+    name: root
+    label: mbr
+    partitions:
+      - flags: [boot, primary]
+        name: root
+        base: image0
+        size: 100%
+
+- mount:
+    base: mkfs_root
+    name: this_is_a_duplicate
+    mount_point: /
+
+- fstab:
+    base: mount_mkfs_root
+    name: fstab_mount_mkfs_root
+    fsck-passno: 1
+    options: defaults
+
+- mkfs:
+    base: root
+    name: mkfs_root
+    type: ext4
diff --git a/diskimage_builder/block_device/tests/config/multi_key_node.yaml b/diskimage_builder/block_device/tests/config/multi_key_node.yaml
new file mode 100644
index 000000000..ef4608370
--- /dev/null
+++ b/diskimage_builder/block_device/tests/config/multi_key_node.yaml
@@ -0,0 +1,8 @@
+- mkfs:
+    name: root_fs
+    base: root_part
+    type: xfs
+  mount:
+    name: mount_root_fs
+    base: root_fs
+    mount_point: /
\ No newline at end of file
diff --git a/diskimage_builder/block_device/tests/config/simple_graph.yaml b/diskimage_builder/block_device/tests/config/simple_graph.yaml
index e1eda1467..59c0aeeb5 100644
--- a/diskimage_builder/block_device/tests/config/simple_graph.yaml
+++ b/diskimage_builder/block_device/tests/config/simple_graph.yaml
@@ -1,6 +1,7 @@
 - mkfs:
     name: root_fs
     base: root_part
+    type: xfs
 
 - mount:
     name: mount_root_fs
diff --git a/diskimage_builder/block_device/tests/config/simple_tree.yaml b/diskimage_builder/block_device/tests/config/simple_tree.yaml
index d44f319b1..67add797e 100644
--- a/diskimage_builder/block_device/tests/config/simple_tree.yaml
+++ b/diskimage_builder/block_device/tests/config/simple_tree.yaml
@@ -1,5 +1,6 @@
 - mkfs:
     name: root_fs
     base: root_part
+    type: xfs
     mount:
       mount_point: /
\ No newline at end of file
diff --git a/diskimage_builder/block_device/tests/test_config.py b/diskimage_builder/block_device/tests/test_config.py
index af2ee9ada..911deb709 100644
--- a/diskimage_builder/block_device/tests/test_config.py
+++ b/diskimage_builder/block_device/tests/test_config.py
@@ -69,12 +69,22 @@ class TestGraphGeneration(TestConfig):
 class TestConfigParsing(TestConfig):
     """Test parsing config file into a graph"""
 
+    # test an entry in the config not being a valid plugin
     def test_config_bad_plugin(self):
         config = self.load_config_file('bad_plugin.yaml')
         self.assertRaises(BlockDeviceSetupException,
                           config_tree_to_graph,
                           config)
 
+    # test a config that has multiple keys for a top-level entry
+    def test_config_multikey_node(self):
+        config = self.load_config_file('multi_key_node.yaml')
+        self.assertRaisesRegexp(BlockDeviceSetupException,
+                                "Config entry top-level should be a single "
+                                "dict:",
+                                config_tree_to_graph,
+                                config)
+
     # a graph should remain the same
     def test_graph(self):
         graph = self.load_config_file('simple_graph.yaml')
@@ -106,6 +116,23 @@ class TestConfigParsing(TestConfig):
 
 class TestCreateGraph(TestGraphGeneration):
 
+    # Test a graph with bad edge pointing to an invalid node
+    def test_invalid_missing(self):
+        config = self.load_config_file('bad_edge_graph.yaml')
+        self.assertRaisesRegexp(BlockDeviceSetupException,
+                                "Edge not defined: this_is_not_a_node",
+                                self.bd.create_graph,
+                                config, self.fake_default_config)
+
+    # Test a graph with bad edge pointing to an invalid node
+    def test_duplicate_name(self):
+        config = self.load_config_file('duplicate_name.yaml')
+        self.assertRaisesRegexp(BlockDeviceSetupException,
+                                "Duplicate node name: "
+                                "this_is_a_duplicate",
+                                self.bd.create_graph,
+                                config, self.fake_default_config)
+
     # Test digraph generation from deep_graph config file
     def test_deep_graph_generator(self):
         config = self.load_config_file('deep_graph.yaml')
diff --git a/requirements.txt b/requirements.txt
index 4419fef0c..15651bb3a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,6 +2,7 @@
 # of appearance. Changing the order has an impact on the overall integration
 # process, which may cause wedges in the gate later.
 Babel!=2.4.0,>=2.3.4 # BSD
+networkx>=1.10 # BSD
 pbr!=2.1.0,>=2.0.0 # Apache-2.0
 PyYAML>=3.10.0 # MIT
 flake8<2.6.0,>=2.5.4 # MIT