Fix column parser (r10)

List table column parser did not dynamically recalc indentation when rebuilding tables. Fixed. Change-Id: I4ef1013454aa1d0252f5bf5d9fc8f33a44a021e7 Signed-off-by: Ron Stone <ronald.stone@windriver.com>
2025-06-10 11:31:02 +00:00
parent c9f9f2809f
commit 10b385e726
2 changed files with 159 additions and 131 deletions
--- a/remove-list-columns.py
+++ b/remove-list-columns.py
@@ -3,151 +3,175 @@ import re
 import sys

 def parse_meta_directives(lines):
-    directives = {}
-    for line in lines:
-        if ":remove-column-from-html-table:" in line:
-            directives["remove_column"] = line.split(":", 2)[2].strip()
-        if ":remove-column-emptied-row:" in line:
-            directives["remove_emptied_row"] = line.split(":", 2)[2].strip() == "1"
-    return directives
+   directives = {}
+   for line in lines:
+       if ":remove-column-from-html-table:" in line:
+           directives["remove_column"] = line.split(":", 2)[2].strip()
+       if ":remove-column-emptied-row:" in line:
+           directives["remove_emptied_row"] = line.split(":", 2)[2].strip() == "1"
+       if ":docs-build-context:" in line:
+           directives["context"] = line.split(":", 2)[2].strip()
+
+   return directives

 def extract_table_blocks(lines):
-    blocks = []
-    current = []
-    inside = False
-    for line in lines:
-        if line.strip().startswith(".. list-table::"):
-            inside = True
-            current = [line]
-        elif inside and line.startswith("  ") or line.strip() == "":
-            current.append(line)
-        elif inside:
-            blocks.append(current)
-            inside = False
-    if inside:
-        blocks.append(current)
-    return blocks
+   blocks = []
+   current = []
+   inside = False
+   for line in lines:
+       if line.strip().startswith(".. list-table::"):
+           inside = True
+           current = [line]
+       elif inside and line.startswith("  ") or line.strip() == "":
+           current.append(line)
+       elif inside:
+           blocks.append(current)
+           inside = False
+   if inside:
+       blocks.append(current)
+   return blocks

 def split_table_row(row_lines):
-    """Splits a table row (beginning with '*') into a list of cells."""
-    cells = []
-    current_cell = []
-    for line in row_lines:
-        if re.match(r'^\s*\*\s+-', line):  # First cell in row
-            parts = re.split(r'\s*\*\s+-\s*', line, maxsplit=1)
-            current_cell = [parts[1]]
-        elif re.match(r'^\s*-\s+', line):  # New cell
-            cells.append(current_cell)
-            current_cell = [line.strip()[2:]]
-        else:
-            current_cell.append(line.strip())
-    cells.append(current_cell)
-    return cells
+   """Splits a table row (beginning with '*') into a list of cells."""
+   cells = []
+   current_cell = []
+   for line in row_lines:
+       if re.match(r'^\s*\*\s+-', line):  # First cell in row
+           parts = re.split(r'\s*\*\s+-\s*', line, maxsplit=1)
+           current_cell = [parts[1]]
+       elif re.match(r'^\s*-\s+', line):  # New cell
+           cells.append(current_cell)
+           current_cell = [line.strip()[2:]]
+       else:
+           current_cell.append(line.strip())
+   cells.append(current_cell)
+   return cells

-def join_cells(cells, base_indent="  "):
-    """Reconstructs a list-table row from cell lists."""
-    line = f"{base_indent}* - " + cells[0][0]
-    lines = [line]
-    for line in cells[0][1:]:
-        lines.append(base_indent + "    " + line)
-    for cell in cells[1:]:
-        lines.append(base_indent + "  - " + cell[0])
-        for l in cell[1:]:
-            lines.append(base_indent + "    " + l)
-    return lines
+def join_cells(cells, base_indent):
+   """Reconstructs a list-table row from cell lists."""
+   line = f"{base_indent}* - " + cells[0][0]
+   lines = [line]
+   for line in cells[0][1:]:
+       lines.append(base_indent + "    " + line)
+   for cell in cells[1:]:
+       lines.append(base_indent + "  - " + cell[0])
+       for l in cell[1:]:
+           lines.append(base_indent + "    " + l)
+   return lines

-def process_table(table_lines, col_to_remove, remove_empty_row=False):
-    processed = []
-    table_rows = []
-    header_index = -1
-    header_row = []
-    buffer = []
+def process_table(table_lines, cols_to_remove_str, remove_empty_row=False):
+   # Parse comma-separated column names
+   cols_to_remove = [col.strip() for col in cols_to_remove_str.split(',')]
+   
+   processed = []
+   table_rows = []
+   header_indices = []
+   header_row = []
+   buffer = []

-    for line in table_lines:
-        if re.match(r'\s*\*\s+-', line):
-            if buffer:
-                table_rows.append(buffer)
-            buffer = [line]
-        elif buffer != [] and (line.strip() == "" or re.match(r'^\s*(-|[^*].*)$', line)):
-            buffer.append(line)
-        else:
-            if buffer:
-                table_rows.append(buffer)
-                buffer = []
-            processed.append(line)
+   for line in table_lines:
+       if re.match(r'\s*\*\s+-', line):
+           match = re.match(r'(\s*)\*\s+-', line)
+           indentation = match.group(1)
+           if buffer:
+               table_rows.append(buffer)
+           buffer = [line]
+       elif buffer != [] and (line.strip() == "" or re.match(r'^\s*(-|[^*].*)$', line)):
+           buffer.append(line)
+       else:
+           if buffer:
+               table_rows.append(buffer)
+               buffer = []
+           processed.append(line)

-    if buffer:
-        table_rows.append(buffer)
+   if buffer:
+       table_rows.append(buffer)

-    # Parse header row
-    for i, row in enumerate(table_rows):
-        if i == 0:
-            cells = split_table_row(row)
-            flat_cells = [' '.join(c).strip() for c in cells]
-            if col_to_remove not in flat_cells:
-                return table_lines  # Don't modify
-            header_index = flat_cells.index(col_to_remove)
-            header_row = cells
-        break
+   # Parse header row and find all matching column indices
+   for i, row in enumerate(table_rows):
+       if i == 0:
+           cells = split_table_row(row)
+           flat_cells = [' '.join(c).strip() for c in cells]
+           
+           # Find indices of all columns to remove
+           for col_name in cols_to_remove:
+               if col_name in flat_cells:
+                   header_indices.append(flat_cells.index(col_name))
+           
+           # If no columns found, don't modify
+           if not header_indices:
+               return table_lines
+           
+           # Sort indices in descending order so we can remove from right to left
+           header_indices.sort(reverse=True)
+           header_row = cells
+       break

-    if header_index == -1:
-        return table_lines  # Don't modify
+   if not header_indices:
+       return table_lines  # Don't modify

-    # Remove the column from each row
-    new_rows = []
-    for row in table_rows:
-        cells = split_table_row(row)
-        if header_index >= len(cells):
-            continue
-        if remove_empty_row and all(not ''.join(cell).strip() for cell in cells[:header_index] + cells[header_index+1:]):
-            continue
-        del cells[header_index]
-        new_rows.append(join_cells(cells))
+   # Remove the columns from each row
+   new_rows = []
+   for row in table_rows:
+       cells = split_table_row(row)
+       
+       # Remove columns from right to left to preserve indices
+       for header_index in header_indices:
+           if header_index < len(cells):
+               del cells[header_index]
+       
+       # Check if row should be removed (if it's empty after column removal)
+       if remove_empty_row and all(not ''.join(cell).strip() for cell in cells):
+           continue
+           
+       new_rows.append(join_cells(cells, indentation))

-    return processed + [""] + [line for row in new_rows for line in row]
+   return processed + [""] + [line for row in new_rows for line in row]

 def process_file(path):
-    with open(path, 'r', encoding='utf-8') as f:
-        lines = f.readlines()
+   with open(path, 'r', encoding='utf-8') as f:
+       lines = f.readlines()

-    directives = parse_meta_directives(lines)
-    if "remove_column" not in directives:
-        return
+   directives = parse_meta_directives(lines)
+   if "remove_column" not in directives:
+       return
+   if directives["context"] != os.environ.get('DOCS_BUILD_CONTEXT'):
+       print("Not in", directives["context"], "- Skipping")
+       return

-    table_blocks = extract_table_blocks(lines)
-    output_lines = []
-    i = 0
-    while i < len(lines):
-        line = lines[i]
-        if line.strip().startswith(".. list-table::"):
-            # Find the table block and replace
-            for block in table_blocks:
-                if lines[i:i+len(block)] == block:
-                    processed = process_table(
-                        block,
-                        directives["remove_column"],
-                        directives.get("remove_emptied_row", False)
-                    )
-                    output_lines.extend(processed)
-                    i += len(block)
-                    break
-        else:
-            output_lines.append(line)
-            i += 1
+   table_blocks = extract_table_blocks(lines)
+   output_lines = []
+   i = 0
+   while i < len(lines):
+       line = lines[i]
+       if line.strip().startswith(".. list-table::"):
+           # Find the table block and replace
+           for block in table_blocks:
+               if lines[i:i+len(block)] == block:
+                   processed = process_table(
+                       block,
+                       directives["remove_column"],
+                       directives.get("remove_emptied_row", False)
+                   )
+                   output_lines.extend(processed)
+                   i += len(block)
+                   break
+       else:
+           output_lines.append(line)
+           i += 1

-    with open(path, 'w', encoding='utf-8') as f:
-        f.writelines(l + ("\n" if not l.endswith("\n") else "") for l in output_lines)
+   with open(path, 'w', encoding='utf-8') as f:
+       f.writelines(l + ("\n" if not l.endswith("\n") else "") for l in output_lines)

+# not used currently. We get a list of files from grep and loop
 def scan_dir(directory):
-    for root, _, files in os.walk(directory):
-        for name in files:
-            if name.endswith(".rst"):
-                process_file(os.path.join(root, name))
+   for root, _, files in os.walk(directory):
+       for name in files:
+           if name.endswith(".rst"):
+               process_file(os.path.join(root, name))

 if __name__ == "__main__":
-    if len(sys.argv) != 2:
-        print("Usage: python remove-columns.py <directory>")
-        sys.exit(1)
-    process_file(sys.argv[1])
-    # scan_dir(sys.argv[1])
-
+   if len(sys.argv) != 2:
+       print("Usage: python remove-columns.py <directory>")
+       sys.exit(1)
+   process_file(sys.argv[1])
--- a/tox.ini
+++ b/tox.ini
@@ -11,8 +11,10 @@ setenv = VIRTUAL_ENV={envdir}
         OS_TEST_TIMEOUT=60
         LC_ALL=C
         DOCS_BUILD_CONTEXT=starlingx
-drop_table_cols = {env:DROP_TABLE_COLS:bash -c 'for f in $(grep -rl -e :remove-column-from-html-table: doc/source/*); do python3 remove-list-columns.py "$f" -o "$f"; python3 remove-grid-columns.py "$f" -o "$f"; done'}
-drop_table_rows = {env:DROP_TABLE_ROWS:bash -c 'for f in $(grep -rl -e :remove-empty-table-rows: doc/source/*); do python3 remove_empty-list_rows.py "$f" -o "$f"; python3 remove_empty-grid_rows.py "$f" -o "$f"; done'}
+drop_table_cols_list = {env:DROP_TABLE_COLS_LIST:bash -c 'for f in $(grep -rl -e :remove-column-from-html-table: doc/source/* --exclude-dir="doc/source/contributor"); do python3 remove-list-columns.py "$f"; done'}
+drop_table_cols_grid = {env:DROP_TABLE_COLS_GRID:bash -c 'for f in $(grep -rl -e :remove-column-from-html-table: doc/source/* --exclude-dir="doc/source/contributor"); do python3 remove-grid-columns.py "$f" -o "$f"; done'}
+drop_table_rows_list = {env:DROP_TABLE_ROWS_LIST:bash -c 'for f in $(grep -rl -e :remove-empty-table-rows: doc/source/* --exclude-dir="doc/source/contributor"); do python3 remove_empty-list_rows.py "$f" -o "$f"; done'}
+drop_table_rows_grid = {env:DROP_TABLE_ROWS_GRID:bash -c 'for f in $(grep -rl -e :remove-empty-table-rows: doc/source/* --exclude-dir="doc/source/contributor"); do python3 remove_empty-grid_rows.py "$f" -o "$f"; done'}
 deps = -r{toxinidir}/test-requirements.txt

 [testenv:prebuild-docs]
@@ -29,8 +31,10 @@ commands =
  bash ./fetch-ports-files.sh
  python py_2_xlsx.py tmp/platform_firewall.py tmp/constants.py tmp/FW_PORTS.xlsx
  python xlst_2_csv.py tmp/FW_PORTS.xlsx doc/source/shared/FW_PORTS.csv --columns Source Port Protocol Network Desc HTTPS Note _stx --sort_orders Port=asc --filters _stx=y
-  {[testenv]drop_table_cols}
-  {[testenv]drop_table_rows}
+  {[testenv]drop_table_cols_list}
+  {[testenv]drop_table_cols_grid}
+  {[testenv]drop_table_rows_list}
+  {[testenv]drop_table_rows_grid}

 [testenv:postbuild-docs]
 commands =