From 10b385e7261ae576149ad078616ac4fb3fb011bd Mon Sep 17 00:00:00 2001 From: Ron Stone Date: Tue, 10 Jun 2025 11:31:02 +0000 Subject: [PATCH] Fix column parser (r10) List table column parser did not dynamically recalc indentation when rebuilding tables. Fixed. Change-Id: I4ef1013454aa1d0252f5bf5d9fc8f33a44a021e7 Signed-off-by: Ron Stone --- remove-list-columns.py | 278 ++++++++++++++++++++++------------------- tox.ini | 12 +- 2 files changed, 159 insertions(+), 131 deletions(-) diff --git a/remove-list-columns.py b/remove-list-columns.py index 6b4964807..91888fc9d 100644 --- a/remove-list-columns.py +++ b/remove-list-columns.py @@ -3,151 +3,175 @@ import re import sys def parse_meta_directives(lines): - directives = {} - for line in lines: - if ":remove-column-from-html-table:" in line: - directives["remove_column"] = line.split(":", 2)[2].strip() - if ":remove-column-emptied-row:" in line: - directives["remove_emptied_row"] = line.split(":", 2)[2].strip() == "1" - return directives + directives = {} + for line in lines: + if ":remove-column-from-html-table:" in line: + directives["remove_column"] = line.split(":", 2)[2].strip() + if ":remove-column-emptied-row:" in line: + directives["remove_emptied_row"] = line.split(":", 2)[2].strip() == "1" + if ":docs-build-context:" in line: + directives["context"] = line.split(":", 2)[2].strip() + + return directives def extract_table_blocks(lines): - blocks = [] - current = [] - inside = False - for line in lines: - if line.strip().startswith(".. list-table::"): - inside = True - current = [line] - elif inside and line.startswith(" ") or line.strip() == "": - current.append(line) - elif inside: - blocks.append(current) - inside = False - if inside: - blocks.append(current) - return blocks + blocks = [] + current = [] + inside = False + for line in lines: + if line.strip().startswith(".. list-table::"): + inside = True + current = [line] + elif inside and line.startswith(" ") or line.strip() == "": + current.append(line) + elif inside: + blocks.append(current) + inside = False + if inside: + blocks.append(current) + return blocks def split_table_row(row_lines): - """Splits a table row (beginning with '*') into a list of cells.""" - cells = [] - current_cell = [] - for line in row_lines: - if re.match(r'^\s*\*\s+-', line): # First cell in row - parts = re.split(r'\s*\*\s+-\s*', line, maxsplit=1) - current_cell = [parts[1]] - elif re.match(r'^\s*-\s+', line): # New cell - cells.append(current_cell) - current_cell = [line.strip()[2:]] - else: - current_cell.append(line.strip()) - cells.append(current_cell) - return cells + """Splits a table row (beginning with '*') into a list of cells.""" + cells = [] + current_cell = [] + for line in row_lines: + if re.match(r'^\s*\*\s+-', line): # First cell in row + parts = re.split(r'\s*\*\s+-\s*', line, maxsplit=1) + current_cell = [parts[1]] + elif re.match(r'^\s*-\s+', line): # New cell + cells.append(current_cell) + current_cell = [line.strip()[2:]] + else: + current_cell.append(line.strip()) + cells.append(current_cell) + return cells -def join_cells(cells, base_indent=" "): - """Reconstructs a list-table row from cell lists.""" - line = f"{base_indent}* - " + cells[0][0] - lines = [line] - for line in cells[0][1:]: - lines.append(base_indent + " " + line) - for cell in cells[1:]: - lines.append(base_indent + " - " + cell[0]) - for l in cell[1:]: - lines.append(base_indent + " " + l) - return lines +def join_cells(cells, base_indent): + """Reconstructs a list-table row from cell lists.""" + line = f"{base_indent}* - " + cells[0][0] + lines = [line] + for line in cells[0][1:]: + lines.append(base_indent + " " + line) + for cell in cells[1:]: + lines.append(base_indent + " - " + cell[0]) + for l in cell[1:]: + lines.append(base_indent + " " + l) + return lines -def process_table(table_lines, col_to_remove, remove_empty_row=False): - processed = [] - table_rows = [] - header_index = -1 - header_row = [] - buffer = [] +def process_table(table_lines, cols_to_remove_str, remove_empty_row=False): + # Parse comma-separated column names + cols_to_remove = [col.strip() for col in cols_to_remove_str.split(',')] + + processed = [] + table_rows = [] + header_indices = [] + header_row = [] + buffer = [] - for line in table_lines: - if re.match(r'\s*\*\s+-', line): - if buffer: - table_rows.append(buffer) - buffer = [line] - elif buffer != [] and (line.strip() == "" or re.match(r'^\s*(-|[^*].*)$', line)): - buffer.append(line) - else: - if buffer: - table_rows.append(buffer) - buffer = [] - processed.append(line) + for line in table_lines: + if re.match(r'\s*\*\s+-', line): + match = re.match(r'(\s*)\*\s+-', line) + indentation = match.group(1) + if buffer: + table_rows.append(buffer) + buffer = [line] + elif buffer != [] and (line.strip() == "" or re.match(r'^\s*(-|[^*].*)$', line)): + buffer.append(line) + else: + if buffer: + table_rows.append(buffer) + buffer = [] + processed.append(line) - if buffer: - table_rows.append(buffer) + if buffer: + table_rows.append(buffer) - # Parse header row - for i, row in enumerate(table_rows): - if i == 0: - cells = split_table_row(row) - flat_cells = [' '.join(c).strip() for c in cells] - if col_to_remove not in flat_cells: - return table_lines # Don't modify - header_index = flat_cells.index(col_to_remove) - header_row = cells - break + # Parse header row and find all matching column indices + for i, row in enumerate(table_rows): + if i == 0: + cells = split_table_row(row) + flat_cells = [' '.join(c).strip() for c in cells] + + # Find indices of all columns to remove + for col_name in cols_to_remove: + if col_name in flat_cells: + header_indices.append(flat_cells.index(col_name)) + + # If no columns found, don't modify + if not header_indices: + return table_lines + + # Sort indices in descending order so we can remove from right to left + header_indices.sort(reverse=True) + header_row = cells + break - if header_index == -1: - return table_lines # Don't modify + if not header_indices: + return table_lines # Don't modify - # Remove the column from each row - new_rows = [] - for row in table_rows: - cells = split_table_row(row) - if header_index >= len(cells): - continue - if remove_empty_row and all(not ''.join(cell).strip() for cell in cells[:header_index] + cells[header_index+1:]): - continue - del cells[header_index] - new_rows.append(join_cells(cells)) + # Remove the columns from each row + new_rows = [] + for row in table_rows: + cells = split_table_row(row) + + # Remove columns from right to left to preserve indices + for header_index in header_indices: + if header_index < len(cells): + del cells[header_index] + + # Check if row should be removed (if it's empty after column removal) + if remove_empty_row and all(not ''.join(cell).strip() for cell in cells): + continue + + new_rows.append(join_cells(cells, indentation)) - return processed + [""] + [line for row in new_rows for line in row] + return processed + [""] + [line for row in new_rows for line in row] def process_file(path): - with open(path, 'r', encoding='utf-8') as f: - lines = f.readlines() + with open(path, 'r', encoding='utf-8') as f: + lines = f.readlines() - directives = parse_meta_directives(lines) - if "remove_column" not in directives: - return + directives = parse_meta_directives(lines) + if "remove_column" not in directives: + return + if directives["context"] != os.environ.get('DOCS_BUILD_CONTEXT'): + print("Not in", directives["context"], "- Skipping") + return - table_blocks = extract_table_blocks(lines) - output_lines = [] - i = 0 - while i < len(lines): - line = lines[i] - if line.strip().startswith(".. list-table::"): - # Find the table block and replace - for block in table_blocks: - if lines[i:i+len(block)] == block: - processed = process_table( - block, - directives["remove_column"], - directives.get("remove_emptied_row", False) - ) - output_lines.extend(processed) - i += len(block) - break - else: - output_lines.append(line) - i += 1 + table_blocks = extract_table_blocks(lines) + output_lines = [] + i = 0 + while i < len(lines): + line = lines[i] + if line.strip().startswith(".. list-table::"): + # Find the table block and replace + for block in table_blocks: + if lines[i:i+len(block)] == block: + processed = process_table( + block, + directives["remove_column"], + directives.get("remove_emptied_row", False) + ) + output_lines.extend(processed) + i += len(block) + break + else: + output_lines.append(line) + i += 1 - with open(path, 'w', encoding='utf-8') as f: - f.writelines(l + ("\n" if not l.endswith("\n") else "") for l in output_lines) + with open(path, 'w', encoding='utf-8') as f: + f.writelines(l + ("\n" if not l.endswith("\n") else "") for l in output_lines) +# not used currently. We get a list of files from grep and loop def scan_dir(directory): - for root, _, files in os.walk(directory): - for name in files: - if name.endswith(".rst"): - process_file(os.path.join(root, name)) + for root, _, files in os.walk(directory): + for name in files: + if name.endswith(".rst"): + process_file(os.path.join(root, name)) if __name__ == "__main__": - if len(sys.argv) != 2: - print("Usage: python remove-columns.py ") - sys.exit(1) - process_file(sys.argv[1]) - # scan_dir(sys.argv[1]) - + if len(sys.argv) != 2: + print("Usage: python remove-columns.py ") + sys.exit(1) + process_file(sys.argv[1]) diff --git a/tox.ini b/tox.ini index 83b67bb8a..f5f5e377e 100644 --- a/tox.ini +++ b/tox.ini @@ -11,8 +11,10 @@ setenv = VIRTUAL_ENV={envdir} OS_TEST_TIMEOUT=60 LC_ALL=C DOCS_BUILD_CONTEXT=starlingx -drop_table_cols = {env:DROP_TABLE_COLS:bash -c 'for f in $(grep -rl -e :remove-column-from-html-table: doc/source/*); do python3 remove-list-columns.py "$f" -o "$f"; python3 remove-grid-columns.py "$f" -o "$f"; done'} -drop_table_rows = {env:DROP_TABLE_ROWS:bash -c 'for f in $(grep -rl -e :remove-empty-table-rows: doc/source/*); do python3 remove_empty-list_rows.py "$f" -o "$f"; python3 remove_empty-grid_rows.py "$f" -o "$f"; done'} +drop_table_cols_list = {env:DROP_TABLE_COLS_LIST:bash -c 'for f in $(grep -rl -e :remove-column-from-html-table: doc/source/* --exclude-dir="doc/source/contributor"); do python3 remove-list-columns.py "$f"; done'} +drop_table_cols_grid = {env:DROP_TABLE_COLS_GRID:bash -c 'for f in $(grep -rl -e :remove-column-from-html-table: doc/source/* --exclude-dir="doc/source/contributor"); do python3 remove-grid-columns.py "$f" -o "$f"; done'} +drop_table_rows_list = {env:DROP_TABLE_ROWS_LIST:bash -c 'for f in $(grep -rl -e :remove-empty-table-rows: doc/source/* --exclude-dir="doc/source/contributor"); do python3 remove_empty-list_rows.py "$f" -o "$f"; done'} +drop_table_rows_grid = {env:DROP_TABLE_ROWS_GRID:bash -c 'for f in $(grep -rl -e :remove-empty-table-rows: doc/source/* --exclude-dir="doc/source/contributor"); do python3 remove_empty-grid_rows.py "$f" -o "$f"; done'} deps = -r{toxinidir}/test-requirements.txt [testenv:prebuild-docs] @@ -29,8 +31,10 @@ commands = bash ./fetch-ports-files.sh python py_2_xlsx.py tmp/platform_firewall.py tmp/constants.py tmp/FW_PORTS.xlsx python xlst_2_csv.py tmp/FW_PORTS.xlsx doc/source/shared/FW_PORTS.csv --columns Source Port Protocol Network Desc HTTPS Note _stx --sort_orders Port=asc --filters _stx=y - {[testenv]drop_table_cols} - {[testenv]drop_table_rows} + {[testenv]drop_table_cols_list} + {[testenv]drop_table_cols_grid} + {[testenv]drop_table_rows_list} + {[testenv]drop_table_rows_grid} [testenv:postbuild-docs] commands =