Synch nextcloud with cluster

`list_local_files()`

Non-recursive listing of files in LOCAL_DIR (files only).

Source code in src/kibad_llm/data_integration/synch_nextcloud_with_cluster.py

def list_local_files():
    """Non-recursive listing of files in LOCAL_DIR (files only)."""
    return [f for f in os.listdir(LOCAL_DIR) if os.path.isfile(os.path.join(LOCAL_DIR, f))]

`list_nextcloud_files()`

Returns a list of filenames that are direct (non-directory) children of the public share root. Uses PROPFIND with Depth: 1 and parses XML.

Source code in src/kibad_llm/data_integration/synch_nextcloud_with_cluster.py

def list_nextcloud_files():
    """
    Returns a list of filenames that are direct (non-directory) children
    of the public share root. Uses PROPFIND with Depth: 1 and parses XML.
    """
    headers = {"Depth": "1", "Content-Type": 'application/xml; charset="utf-8"'}

    resp = requests.request(
        "PROPFIND",
        NEXTCLOUD_WEBDAV_URL,
        data=PROPFIND_BODY.encode("utf-8"),
        headers=headers,
        auth=AUTH,
        timeout=30,
    )
    if resp.status_code not in (207, 200):
        raise RuntimeError(f"PROPFIND failed: {resp.status_code} {resp.text[:500]}")

    try:
        root = ET.fromstring(resp.content)
    except ET.ParseError as e:
        raise RuntimeError(
            f"Failed to parse PROPFIND XML: {e}\nResponse (truncated): {resp.text[:1000]}"
        )

    files = []
    # canonicalize root path (so we can skip the entry for the folder itself)
    requested_path = urlparse(NEXTCLOUD_WEBDAV_URL).path.rstrip("/")

    # Iterate over all <d:response> entries
    for response_elem in root.findall(".//{DAV:}response"):
        href_elem = response_elem.find("{DAV:}href")
        if href_elem is None or (href_elem.text or "") == "":
            continue
        href_text = href_elem.text
        href_path = urlparse(href_text).path  # path part only
        # skip the entry for the folder itself
        if href_path.rstrip("/") == requested_path:
            continue

        # determine if this response is a collection (directory)
        # look for resourcetype/collection
        is_collection = False
        resourcetype = response_elem.find(".//{DAV:}resourcetype")
        if resourcetype is not None and resourcetype.find("{DAV:}collection") is not None:
            is_collection = True

        if is_collection:
            # skip directories (only top-level files returned)
            continue

        # filename is the last path segment (decoded)
        name = href_path.rstrip("/").split("/")[-1]
        if not name:
            continue
        name = unquote(name)
        files.append(name)

    return files