Search all dependencies to check which books to build

So far we only checked one level of depth for inclusion of modified files.
This let to missed book builds.

Now, we check all dependencies, even over multiple includes.

Rename args to silence pylint warnings.

Change-Id: I78df8803f17cdb736230c2137277bb3886071e85
This commit is contained in:
Andreas Jaeger 2013-10-07 15:20:21 +02:00
parent cc1463a6ce
commit 638f7cb478

View File

@ -147,8 +147,8 @@ def only_www_touched():
"""Check whether only files in www directory are touched"""
try:
args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"]
modified_files = check_output(args).strip().split()
git_args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"]
modified_files = check_output(git_args).strip().split()
except (subprocess.CalledProcessError, OSError) as e:
print("git failed: %s" % e)
sys.exit(1)
@ -169,8 +169,8 @@ def ha_guide_touched():
"""Check whether files in high-availability-guide directory are touched"""
try:
args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"]
modified_files = check_output(args).strip().split()
git_args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"]
modified_files = check_output(git_args).strip().split()
except (subprocess.CalledProcessError, OSError) as e:
print("git failed: %s" % e)
sys.exit(1)
@ -193,8 +193,8 @@ def check_modified_affects_all(rootdir, verbose):
os.chdir(rootdir)
try:
args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"]
modified_files = check_output(args).strip().split()
git_args = ["git", "diff", "--name-only", "HEAD~1", "HEAD"]
modified_files = check_output(git_args).strip().split()
except (subprocess.CalledProcessError, OSError) as e:
print("git failed: %s" % e)
sys.exit(1)
@ -222,10 +222,11 @@ def get_modified_files(rootdir, filtering=None):
os.chdir(rootdir)
try:
args = ["git", "diff", "--name-only", "--relative", "HEAD~1", "HEAD"]
git_args = ["git", "diff", "--name-only", "--relative", "HEAD~1",
"HEAD"]
if filtering is not None:
args.append(filtering)
modified_files = check_output(args).strip().split()
git_args.append(filtering)
modified_files = check_output(git_args).strip().split()
except (subprocess.CalledProcessError, OSError) as e:
print("git failed: %s" % e)
sys.exit(1)
@ -391,7 +392,6 @@ def validate_all_files(rootdir, exceptions, verbose,
elif check_niceness:
print("Checking niceness of all xml files...")
for root, dirs, files in os.walk(rootdir):
# Don't descend into 'target' subdirectories
try:
@ -485,23 +485,41 @@ def build_book(book):
return (base_book, result, output, returncode)
def build_affected_books(rootdir, book_exceptions, file_exceptions,
verbose, force=False, voting=True):
"""Build all the books which are affected by modified files.
def is_book_master(filename):
"""Returns True if filename is one of the special filenames used for the
book master files.
Looks for all directories with "pom.xml" and checks if a
XML file in the directory includes a modified file. If at least
one XML file includes a modified file the method calls
"mvn clean generate-sources" in that directory.
This will throw an exception if a book fails to build
We do not parse pom.xml for the includes directive to determine
the top-level files and thus have to use a heuristic.
"""
return ((filename.startswith(('bk-', 'bk_', 'st-'))
and filename.endswith('.xml')) or
filename == 'openstack-glossary.xml')
def find_affected_books(rootdir, book_exceptions, verbose,
force):
"""Check which books are affected by modified files.
Returns a set with books.
"""
modified_files = get_modified_files(rootdir)
modified_files = map(lambda x: os.path.abspath(x), modified_files)
build_all_books = force or check_modified_affects_all(rootdir, verbose)
affected_books = []
books = []
book_root = rootdir
books = []
affected_books = set()
build_all_books = force or check_modified_affects_all(rootdir, verbose)
# Dictionary that contains a set of files.
# The key is a filename, the set contains files that include this file.
included_by = {}
# Dictionary with books and their bk*.xml files
book_bk = {}
# 1. Iterate over whole tree and analyze include files.
# This updates included_by, book_bk and books.
for root, dirs, files in os.walk(rootdir):
# Don't descend into 'target' subdirectories
try:
@ -519,59 +537,77 @@ def build_affected_books(rootdir, book_exceptions, file_exceptions,
books.append(root)
book_root = root
os.chdir(root)
# No need to check single books if we build all, we just
# collect list of books
if build_all_books:
continue
# ha-guide uses asciidoc which we do not track.
# Just check whether any file is touched in that directory
if root.endswith('doc/high-availability-guide'):
if ha_guide_touched():
affected_books.append(book_root)
# We can scan only for depth of one of inclusion
# therefore skip the common directory since there's no
# book build in it.
elif not root.endswith('doc/common'):
for f in files:
if (f.endswith('.xml') and
f not in file_exceptions):
path = os.path.abspath(os.path.join(root, f))
# If the file itself is modified, build the book
if path in modified_files:
affected_books.append(book_root)
break
affected_books.add(book_root)
# If the file itself is modified, build the book.
# Note this is an optimization in most cases but
# needed for bk-*.xml since those are included by
# pom.xml and pom.xml is not checked for
# modification of included files.
doc = etree.parse(path)
for f in files:
f_base = os.path.basename(f)
f_abs = os.path.abspath(os.path.join(root, f))
if is_book_master(f_base):
book_bk[f_abs] = book_root
if (f.endswith('.xml') and
f != "pom.xml" and
f != "ha-guide-docinfo.xml"):
doc = etree.parse(f_abs)
for node in doc.findall(
'//{http://docbook.org/ns/docbook}imagedata'):
href = node.get('fileref')
href_abs = os.path.abspath(os.path.join(root, href))
if href_abs in included_by:
included_by[href_abs].add(f_abs)
else:
included_by[href_abs] = set([f_abs])
# Check for inclusion of files as part of imagedata
for node in doc.findall(
'//{http://docbook.org/ns/docbook}imagedata'):
href = node.get('fileref')
if (f not in file_exceptions and
os.path.abspath(href) in modified_files):
affected_books.append(book_root)
break
ns = {"xi": "http://www.w3.org/2001/XInclude"}
for node in doc.xpath('//xi:include', namespaces=ns):
href = node.get('href')
href_abs = os.path.abspath(os.path.join(root, href))
if href_abs in included_by:
included_by[href_abs].add(f_abs)
else:
included_by[href_abs] = set([f_abs])
if book_root in affected_books:
break
if not build_all_books:
# Generate list of modified_files
modified_files = get_modified_files(rootdir)
modified_files = map(lambda x: os.path.abspath(x), modified_files)
# Check for inclusion of files as part of xi:include
ns = {"xi": "http://www.w3.org/2001/XInclude"}
for node in doc.xpath('//xi:include', namespaces=ns):
href = node.get('href')
if (f not in file_exceptions and
os.path.abspath(href) in modified_files):
affected_books.append(book_root)
break
if book_root in affected_books:
break
# 2. Find all modified files and where they are included
# List of files that we have to iterate over, these are affected
# by some modification
new_files = modified_files
# All files that are affected (either directly or indirectly)
affected_files = set(modified_files)
# 3. Iterate over files that have includes on modified files
# and build a closure - the set of all files (affected_files)
# that have a path to a modified file via includes.
while len(new_files) > 0:
new_files_to_check = new_files
new_files = []
for f in new_files_to_check:
# Skip bk*.xml files
if is_book_master(os.path.basename(f)):
book_modified = book_bk[f]
if book_modified not in affected_books:
affected_books.add(book_modified)
continue
if f not in included_by:
continue
for g in included_by[f]:
if g not in affected_files:
new_files.append(g)
affected_files.add(g)
if build_all_books:
print("Building all books.")
@ -580,6 +616,23 @@ def build_affected_books(rootdir, book_exceptions, file_exceptions,
else:
print("No books are affected by modified files. Building all books.")
return books
def build_affected_books(rootdir, book_exceptions,
verbose, force=False, voting=True):
"""Build all the books which are affected by modified files.
Looks for all directories with "pom.xml" and checks if a
XML file in the directory includes a modified file. If at least
one XML file includes a modified file the method calls
"mvn clean generate-sources" in that directory.
This will throw an exception if a book fails to build
"""
books = find_affected_books(rootdir, book_exceptions,
verbose, force)
maxjobs = multiprocessing.cpu_count()
# Jenkins fails sometimes with errors if too many jobs run, artificially
# limit to 4 for now.
@ -637,7 +690,7 @@ def main(args):
args.check_niceness, args.non_voting)
if args.check_build:
build_affected_books(args.path, BOOK_EXCEPTIONS, FILE_EXCEPTIONS,
build_affected_books(args.path, BOOK_EXCEPTIONS,
args.verbose, args.force, args.non_voting)
@ -647,8 +700,8 @@ def default_root():
The current working directory must be inside of the openstack-manuals
repository for this method to succeed"""
try:
args = ["git", "rev-parse", "--show-toplevel"]
gitroot = check_output(args).rstrip()
git_args = ["git", "rev-parse", "--show-toplevel"]
gitroot = check_output(git_args).rstrip()
except (subprocess.CalledProcessError, OSError) as e:
print("git failed: %s" % e)
sys.exit(1)
@ -679,8 +732,8 @@ if __name__ == '__main__':
action="store_false")
parser.add_argument("--verbose", help="Verbose execution",
action="store_true")
args = parser.parse_args()
prog_args = parser.parse_args()
if (len(sys.argv) == 1):
# No arguments given, use check-all
args.check_all = True
main(args)
prog_args.check_all = True
main(prog_args)