workshops/QueComanTierra/build_pptx.py

#!/usr/bin/env python3
"""
Genera slides.pptx desde slides.md usando la plantilla Diplomados Ciberseguridad.
Los backgrounds se copian directamente desde los slides del template.
"""
from pptx import Presentation
from pptx.util import Pt, Emu
from pptx.dml.color import RGBColor
from lxml import etree
import copy, re, zipfile, io, tempfile, os

TEMPLATE = "../../Red Team.pptx"
SOURCE   = "slides.md"
OUTPUT   = "slides.pptx"

# Layouts en la plantilla (por nombre español)
L_TITLE   = 0  # Diapositiva de titulo  (idx 0=ctrTitle, 1=subTitle)
L_CONTENT = 1  # Titulo y objetos       (idx 0=title,    1=content)
L_SECTION = 2  # Encabezado de seccion  (idx 0=title,    1=body)

# Slides del template que sirven como fuente de backgrounds
# slide1=portada, slide3=contenido, slide5=seccion divisor
BG_TITLE   = 0  # image1.jpg — portada con fondo binario rojo
BG_CONTENT = 2  # image3.jpg — slide normal con logos y barra roja
BG_SECTION = 4  # image5.jpg — divisor rojo degradado

PNS = 'http://schemas.openxmlformats.org/presentationml/2006/main'
ANS = 'http://schemas.openxmlformats.org/drawingml/2006/main'
RNS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships'
IMG_REL = f'{RNS}/image'

# ─── template limpio ─────────────────────────────────────────────────────────

def make_clean_template(src):
    """Devuelve BytesIO: template sin slides (master/layouts/media intactos)."""
    SLIDE_XML  = re.compile(r'^ppt/slides/slide\d+\.xml$')
    SLIDE_RELS = re.compile(r'^ppt/slides/_rels/slide\d+\.xml\.rels$')
    SLIDE_CT   = 'application/vnd.openxmlformats-officedocument.presentationml.slide+xml'
    SLIDE_REL_TYPE = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide'
    PKG_NS = 'http://schemas.openxmlformats.org/package/2006/relationships'
    PML_NS = 'http://schemas.openxmlformats.org/presentationml/2006/main'
    CT_NS  = 'http://schemas.openxmlformats.org/package/2006/content-types'

    buf = io.BytesIO()
    with zipfile.ZipFile(src, 'r') as zin, \
         zipfile.ZipFile(buf, 'w', zipfile.ZIP_DEFLATED) as zout:
        for item in zin.infolist():
            name = item.filename
            if SLIDE_XML.match(name) or SLIDE_RELS.match(name):
                continue          # eliminar slides y sus rels del ZIP
            data = zin.read(name)

            if name == 'ppt/presentation.xml':
                root = etree.fromstring(data)
                lst = root.find(f'{{{PML_NS}}}sldIdLst')
                if lst is not None:
                    lst.clear()
                data = etree.tostring(root, xml_declaration=True,
                                      encoding='UTF-8', standalone=True)

            elif name == 'ppt/_rels/presentation.xml.rels':
                root = etree.fromstring(data)
                for rel in root.findall(f'{{{PKG_NS}}}Relationship'):
                    if rel.get('Type') == SLIDE_REL_TYPE:
                        root.remove(rel)
                data = etree.tostring(root, xml_declaration=True,
                                      encoding='UTF-8', standalone=True)

            elif name == '[Content_Types].xml':
                root = etree.fromstring(data)
                for ov in root.findall(f'{{{CT_NS}}}Override'):
                    if ov.get('ContentType') == SLIDE_CT:
                        root.remove(ov)
                data = etree.tostring(root, xml_declaration=True,
                                      encoding='UTF-8', standalone=True)

            zout.writestr(item, data)

    buf.seek(0)
    return buf

# ─── background copy ──────────────────────────────────────────────────────────

def copy_background(new_slide, template_slide):
    """Copia el <p:bg> del template_slide al new_slide, remapeando el rId de imagen."""
    cSld_tmpl = template_slide._element.find(f'{{{PNS}}}cSld')
    bg = cSld_tmpl.find(f'{{{PNS}}}bg') if cSld_tmpl is not None else None
    if bg is None:
        return

    blip = bg.find(f'.//{{{ANS}}}blip')
    if blip is None:
        return

    old_rId = blip.get(f'{{{RNS}}}embed')
    if not old_rId:
        return

    # Obtener la imagen del template y relacionarla con el nuevo slide
    image_part = template_slide.part.related_part(old_rId)
    new_rId = new_slide.part.relate_to(image_part, IMG_REL)

    # Clonar <p:bg> y actualizar el rId
    new_bg = copy.deepcopy(bg)
    new_blip = new_bg.find(f'.//{{{ANS}}}blip')
    new_blip.set(f'{{{RNS}}}embed', new_rId)

    # Insertar en cSld del nuevo slide (antes de spTree)
    new_cSld = new_slide._element.find(f'{{{PNS}}}cSld')
    existing_bg = new_cSld.find(f'{{{PNS}}}bg')
    if existing_bg is not None:
        new_cSld.remove(existing_bg)
    new_cSld.insert(0, new_bg)

# ─── helpers de texto ─────────────────────────────────────────────────────────

def clean(text):
    text = re.sub(r'\*\*(.*?)\*\*', r'\1', text)
    text = re.sub(r'\*(.*?)\*',     r'\1', text)
    text = re.sub(r'`(.*?)`',       r'\1', text)
    text = re.sub(r'^\s*>\s?',      '',    text, flags=re.MULTILINE)
    return text.strip()

def fill_placeholder(ph, lines):
    tf = ph.text_frame
    tf.word_wrap = True
    tf.clear()

    in_code  = False
    first_p  = True

    for line in lines:
        if line.startswith('```'):
            in_code = not in_code
            continue
        if re.match(r'^\s*\|?[-:| ]+\|', line):
            continue

        stripped = line.rstrip()
        if not stripped and not in_code:
            continue

        p = tf.paragraphs[0] if first_p else tf.add_paragraph()
        first_p = False

        bullet = re.match(r'^(\s*)([-*]|\d+\.) (.+)', stripped)
        if bullet and not in_code:
            indent = len(bullet.group(1)) // 2
            p.level = min(indent + 1, 4)
            run = p.add_run()
            run.text = clean(bullet.group(3))
        elif in_code:
            # Eliminar bullet del párrafo
            pPr = p._p.get_or_add_pPr()
            for child in list(pPr):
                if child.tag.split('}')[-1].startswith('bu'):
                    pPr.remove(child)
            pPr.append(etree.SubElement(pPr, f'{{{ANS}}}buNone'))
            run = p.add_run()
            run.text = stripped.lstrip()
            run.font.name  = 'Courier New'
            run.font.size  = Pt(13)
            run.font.color.rgb = RGBColor(0xC0, 0x00, 0x00)
        else:
            heading = re.match(r'^#{2,4} (.+)', stripped)
            if heading:
                run = p.add_run()
                run.text = clean(heading.group(1))
                run.font.bold = True
            else:
                run = p.add_run()
                run.text = clean(stripped)

# ─── parser ───────────────────────────────────────────────────────────────────

def parse_slides(path):
    with open(path) as f:
        raw = f.read()
    raw = re.sub(r'^---\n.*?\n---\n', '', raw, flags=re.DOTALL)
    blocks = re.split(r'\n---\n', raw)
    return [b.strip() for b in blocks if b.strip()]

def classify(block):
    lines = block.split('\n')
    first = lines[0]

    if re.match(r'^# ', first):
        title = re.sub(r'^# ', '', first).replace('{.center}', '').strip()
        body  = [l for l in lines[1:] if l.strip() and not l.startswith('{')]
        return ('section', title, body)

    if re.match(r'^## \{\.center\}', first) or first.strip() == '## {.center}':
        body = [l for l in lines[1:] if l.strip()]
        return ('section', '', body)

    if re.match(r'^## ', first):
        title = re.sub(r'^## ', '', first).strip()
        return ('content', title, lines[1:])

    return ('content', '', lines)

# ─── tables ───────────────────────────────────────────────────────────────────

def parse_md_table(lines):
    """Separa lineas en (non_table_lines, table_rows).
    Ignora pipes dentro de code fences para no confundirlos con tablas."""
    is_table = lambda l: bool(re.match(r'\s*\|', l.strip()) and '|' in l)
    is_sep   = lambda l: bool(re.match(r'^\s*\|?[-:| ]+\|', l))

    non_table, rows = [], []
    in_code = False
    for line in lines:
        if line.startswith('```'):
            in_code = not in_code
            non_table.append(line)
            continue
        if in_code:
            non_table.append(line)
            continue
        if is_sep(line):
            continue
        if is_table(line):
            cells = [clean(c.strip()) for c in line.strip().strip('|').split('|')]
            rows.append(cells)
        else:
            non_table.append(line)
    return non_table, rows

def add_pptx_table(slide, rows, left, top, width, height):
    """Añade una tabla PPTX estilizada en las coordenadas dadas."""
    if not rows:
        return
    n_rows = len(rows)
    n_cols = max(len(r) for r in rows)

    tbl = slide.shapes.add_table(n_rows, n_cols, left, top, width, height).table

    # Distribuir columnas proporcionalmente (col0 más estrecha = herramienta)
    if n_cols == 2:
        tbl.columns[0].width = int(width * 0.25)
        tbl.columns[1].width = int(width * 0.75)
    else:
        for i in range(n_cols):
            tbl.columns[i].width = width // n_cols

    RED   = RGBColor(0x8B, 0x00, 0x00)
    WHITE = RGBColor(0xFF, 0xFF, 0xFF)
    BLACK = RGBColor(0x00, 0x00, 0x00)
    LIGHT = RGBColor(0xF2, 0xF2, 0xF2)

    for ri, row in enumerate(rows):
        header = (ri == 0)
        for ci in range(n_cols):
            cell = tbl.cell(ri, ci)
            text = row[ci] if ci < len(row) else ''
            cell.text = text
            cell.fill.solid()
            cell.fill.fore_color.rgb = RED if header else (LIGHT if ri % 2 == 0 else WHITE)
            for para in cell.text_frame.paragraphs:
                for run in para.runs:
                    run.font.size  = Pt(14)
                    run.font.bold  = header
                    run.font.color.rgb = WHITE if header else BLACK

# ─── builder ──────────────────────────────────────────────────────────────────

def build():
    # Template original: solo para extraer backgrounds
    tmpl = Presentation(TEMPLATE)
    tmpl_slides = list(tmpl.slides)

    # Template limpio: sin slides, para construir desde cero
    clean_buf = make_clean_template(TEMPLATE)
    prs = Presentation(clean_buf)

    blocks = parse_slides(SOURCE)

    def add(layout_idx, bg_idx):
        slide = prs.slides.add_slide(prs.slide_layouts[layout_idx])
        copy_background(slide, tmpl_slides[bg_idx])
        return slide

    # Slide 1: portada
    slide = add(L_TITLE, BG_TITLE)
    for ph in slide.placeholders:
        if ph.placeholder_format.idx == 0:
            ph.text = "Que coman tierra"
        elif ph.placeholder_format.idx == 1:
            ph.text = "De LOLBINs a ransomware\nDiplomados Ciberseguridad"

    # Slides desde markdown
    for block in blocks:
        kind, title, body_lines = classify(block)

        if kind == 'section':
            slide = add(L_SECTION, BG_SECTION)
            for ph in slide.placeholders:
                if ph.placeholder_format.idx == 0:
                    ph.text = title
                elif ph.placeholder_format.idx == 1 and body_lines:
                    fill_placeholder(ph, body_lines)
                    for para in ph.text_frame.paragraphs:
                        for run in para.runs:
                            run.font.color.rgb = RGBColor(0x00, 0x00, 0x00)
        else:
            slide = add(L_CONTENT, BG_CONTENT)
            non_tbl, tbl_rows = parse_md_table(body_lines)
            for ph in slide.placeholders:
                if ph.placeholder_format.idx == 0:
                    ph.text = title
                elif ph.placeholder_format.idx == 1:
                    if tbl_rows:
                        # Usar posicion del placeholder para colocar la tabla
                        sp = ph._element
                        xfrm = sp.find(f'.//{{{ANS}}}xfrm')
                        if xfrm is not None:
                            off = xfrm.find(f'{{{ANS}}}off')
                            ext = xfrm.find(f'{{{ANS}}}ext')
                            l = int(off.get('x', 0))
                            t = int(off.get('y', 0))
                            w = int(ext.get('cx', 0))
                            h = int(ext.get('cy', 0))
                        else:
                            l, t, w, h = 838200, 1600200, 10515600, 4500000
                        # Si hay texto además de tabla, reducir altura de tabla
                        if non_tbl:
                            fill_placeholder(ph, non_tbl)
                            t_offset = int(h * 0.45)
                            add_pptx_table(slide, tbl_rows, l, t + t_offset, w, h - t_offset)
                        else:
                            ph.text = ''  # vaciar placeholder
                            add_pptx_table(slide, tbl_rows, l, t, w, h)
                    else:
                        fill_placeholder(ph, body_lines)

    prs.save(OUTPUT)
    print(f"OK: {OUTPUT} — {len(prs.slides)} slides.")

if __name__ == '__main__':
    build()