Skip to content

Synch nextcloud with cluster

list_local_files()

Non-recursive listing of files in LOCAL_DIR (files only).

Source code in src/kibad_llm/data_integration/synch_nextcloud_with_cluster.py
102
103
104
def list_local_files():
    """Non-recursive listing of files in LOCAL_DIR (files only)."""
    return [f for f in os.listdir(LOCAL_DIR) if os.path.isfile(os.path.join(LOCAL_DIR, f))]

list_nextcloud_files()

Returns a list of filenames that are direct (non-directory) children of the public share root. Uses PROPFIND with Depth: 1 and parses XML.

Source code in src/kibad_llm/data_integration/synch_nextcloud_with_cluster.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def list_nextcloud_files():
    """
    Returns a list of filenames that are direct (non-directory) children
    of the public share root. Uses PROPFIND with Depth: 1 and parses XML.
    """
    headers = {"Depth": "1", "Content-Type": 'application/xml; charset="utf-8"'}

    resp = requests.request(
        "PROPFIND",
        NEXTCLOUD_WEBDAV_URL,
        data=PROPFIND_BODY.encode("utf-8"),
        headers=headers,
        auth=AUTH,
        timeout=30,
    )
    if resp.status_code not in (207, 200):
        raise RuntimeError(f"PROPFIND failed: {resp.status_code} {resp.text[:500]}")

    try:
        root = ET.fromstring(resp.content)
    except ET.ParseError as e:
        raise RuntimeError(
            f"Failed to parse PROPFIND XML: {e}\nResponse (truncated): {resp.text[:1000]}"
        )

    files = []
    # canonicalize root path (so we can skip the entry for the folder itself)
    requested_path = urlparse(NEXTCLOUD_WEBDAV_URL).path.rstrip("/")

    # Iterate over all <d:response> entries
    for response_elem in root.findall(".//{DAV:}response"):
        href_elem = response_elem.find("{DAV:}href")
        if href_elem is None or (href_elem.text or "") == "":
            continue
        href_text = href_elem.text
        href_path = urlparse(href_text).path  # path part only
        # skip the entry for the folder itself
        if href_path.rstrip("/") == requested_path:
            continue

        # determine if this response is a collection (directory)
        # look for resourcetype/collection
        is_collection = False
        resourcetype = response_elem.find(".//{DAV:}resourcetype")
        if resourcetype is not None and resourcetype.find("{DAV:}collection") is not None:
            is_collection = True

        if is_collection:
            # skip directories (only top-level files returned)
            continue

        # filename is the last path segment (decoded)
        name = href_path.rstrip("/").split("/")[-1]
        if not name:
            continue
        name = unquote(name)
        files.append(name)

    return files