clone repo and upload to instance storage
One script reply has been approved by the moderators Verified

Clones a github repo defined in a git_repository resource, then uploads the files to the instance storage for them to be cached

Created by hugo697 205 days ago Picked 3 times
Submitted by hugo697 Bun
Verified 8 days ago
1
import * as wmillclient from "windmill-client";
2
import { basename, join } from "node:path";
3
import { existsSync, rmSync } from "fs";
4
import process from "process";
5
import { spawn } from 'child_process';
6
import * as fs_async from 'fs/promises';
7
import * as fs from 'node:fs';
8

9
const UPLOAD_CONCURRENCY = 16;
10
const CLONE_MARKER_FILE = ".windmill_clone_complete";
11

12
type GitRepository = {
13
  url: string;
14
  branch: string;
15
  folder: string;
16
  gpg_key: any;
17
  is_github_app: boolean;
18
};
19

20
export async function main(
21
  resource_path: string,
22
  workspace: string,
23
  git_ssh_identity?: string[],
24
  commit?: string
25
) {
26
  let clonedRepoPath: string | undefined;
27

28
  try {
29
    console.log("Starting git clone and Blob storage upload process");
30

31
    // Get the git repository resource
32
    const repo_resource: GitRepository = await wmillclient.getResource(resource_path);
33

34
    const cwd = process.cwd();
35

36
    if (git_ssh_identity) {
37
      process.env.GIT_SSH_COMMAND = await get_git_ssh_cmd(cwd, git_ssh_identity)
38
    }
39

40
    // Handle GitHub App authentication if needed
41
    if (repo_resource.is_github_app) {
42
      const token = await get_gh_app_token();
43
      repo_resource.url = prependTokenToGitHubUrl(repo_resource.url, token);
44
    }
45

46
    process.env["HOME"] = ".";
47
    process.env.GIT_TERMINAL_PROMPT = "0";
48

49
    // Clone the repository
50
    const { repo_name, commitHash } = await git_clone(cwd, repo_resource, commit);
51
    clonedRepoPath = join(cwd, repo_name);
52

53
    // Remove .git directory to avoid uploading git history
54
    const gitDir = join(clonedRepoPath, ".git");
55
    if (existsSync(gitDir)) {
56
      rmSync(gitDir, { recursive: true, force: true });
57
      console.log("Removed .git directory");
58
    }
59

60
    // Upload to S3
61
    const s3Path = `gitrepos/${workspace}/${resource_path}/${commitHash}`;
62
    const fileCount = await uploadDirectoryToS3(clonedRepoPath, s3Path, workspace);
63

64
    return {
65
      success: true,
66
      message: "Repository cloned and uploaded to S3 successfully",
67
      s3_path: s3Path,
68
      commit_hash: commitHash,
69
      file_count: fileCount,
70
    };
71

72
  } catch (error) {
73
    console.error("Error in git clone and upload:", error);
74
    throw error;
75
  } finally {
76
    // Clean up cloned repository
77
    if (clonedRepoPath && existsSync(clonedRepoPath)) {
78
      rmSync(clonedRepoPath, { recursive: true, force: true });
79
      console.log("Cleaned up cloned repository");
80
    }
81
  }
82
}
83

84
async function get_git_ssh_cmd(cwd: string, git_ssh_identity: string[]): Promise<string> {
85
  const sshIdFiles = await Promise.all(
86
    git_ssh_identity.map(async (varPath, i) => {
87
      const filePath = join(cwd, `./ssh_id_priv_${i}`);
88

89
      try {
90
        // Get variable value using windmill
91
        let content = await wmillclient.getVariable(varPath);
92
        content += '\n';
93

94
        // Write file with content
95
        await fs_async.writeFile(filePath, content, { encoding: 'utf8' });
96

97
        // Set file permissions to 0o600 (read/write for owner only)
98
        await fs_async.chmod(filePath, 0o600);
99

100
        // Escape single quotes for shell command
101
        const escapedPath = filePath.replace(/'/g, "'\\''");
102
        return ` -i '${escapedPath}'`;
103
      } catch (error) {
104
        console.error(
105
          `Variable ${varPath} not found for git ssh identity: ${error}`
106
        );
107
        return '';
108
      }
109
    })
110
  );
111

112
  const gitSshCmd = `ssh -o StrictHostKeyChecking=no${sshIdFiles.join('')}`;
113
  return gitSshCmd;
114
}
115

116
async function git_clone(
117
  cwd: string,
118
  repo_resource: GitRepository,
119
  commit?: string,
120
): Promise<{ repo_name: string; commitHash: string }> {
121
    if (commit) {
122
      return git_clone_at_commit(cwd, repo_resource, commit);
123
    } else {
124
      return git_clone_at_latest(cwd, repo_resource);
125
    }
126
}
127

128
async function git_clone_at_commit(
129
  cwd: string,
130
  repo_resource: GitRepository,
131
  commit: string,
132
): Promise<{ repo_name: string; commitHash: string }> {
133
  let repo_url = repo_resource.url;
134
  const subfolder = repo_resource.folder ?? "";
135
  let branch = repo_resource.branch ?? "";
136
  const repo_name = basename(repo_url, ".git");
137

138
  const azureMatch = repo_url.match(/AZURE_DEVOPS_TOKEN\((?<url>.+)\)/);
139
  if (azureMatch) {
140
    console.log("Fetching Azure DevOps access token...");
141
    const azureResource = await wmillclient.getResource(azureMatch.groups.url);
142
    const response = await fetch(
143
      `https://login.microsoftonline.com/${azureResource.azureTenantId}/oauth2/token`,
144
      {
145
        method: "POST",
146
        body: new URLSearchParams({
147
          client_id: azureResource.azureClientId,
148
          client_secret: azureResource.azureClientSecret,
149
          grant_type: "client_credentials",
150
          resource: "499b84ac-1321-427f-aa17-267ca6975798/.default",
151
        }),
152
      }
153
    );
154
    const { access_token } = await response.json();
155
    repo_url = repo_url.replace(azureMatch[0], access_token);
156
  }
157

158
  const repoPath = join(cwd, repo_name);
159
  await fs_async.mkdir(repoPath, { recursive: true });
160

161
  process.chdir(repoPath);
162

163
  let args = ['init', '--quiet']
164
  if (branch) {
165
    args.push(`--initial-branch=${branch}`)
166
  }
167
  await runCommand(undefined, 'git', ...args);
168

169
  await runCommand(0, 'git', 'remote', 'add', 'origin', repo_url);
170

171
  await runCommand(undefined, 'git', 'fetch', '--depth=1', '--quiet', 'origin', commit);
172

173
  await runCommand(undefined, 'git', 'checkout', '--quiet', 'FETCH_HEAD');
174

175
  const commitHash = (await runCommand(undefined, "git", "rev-parse", "HEAD")).trim();
176

177
  // Return to original directory
178
  process.chdir(cwd);
179

180
  return { repo_name, commitHash };
181
}
182

183
async function git_clone_at_latest(
184
  cwd: string,
185
  repo_resource: GitRepository
186
): Promise<{ repo_name: string; commitHash: string }> {
187
  let repo_url = repo_resource.url;
188
  const subfolder = repo_resource.folder ?? "";
189
  let branch = repo_resource.branch ?? "";
190
  const repo_name = basename(repo_url, ".git");
191

192
  // Handle Azure DevOps token if needed
193
  const azureMatch = repo_url.match(/AZURE_DEVOPS_TOKEN\((?<url>.+)\)/);
194
  if (azureMatch) {
195
    console.log("Fetching Azure DevOps access token...");
196
    const azureResource = await wmillclient.getResource(azureMatch.groups.url);
197
    const response = await fetch(
198
      `https://login.microsoftonline.com/${azureResource.azureTenantId}/oauth2/token`,
199
      {
200
        method: "POST",
201
        body: new URLSearchParams({
202
          client_id: azureResource.azureClientId,
203
          client_secret: azureResource.azureClientSecret,
204
          grant_type: "client_credentials",
205
          resource: "499b84ac-1321-427f-aa17-267ca6975798/.default",
206
        }),
207
      }
208
    );
209
    const { access_token } = await response.json();
210
    repo_url = repo_url.replace(azureMatch[0], access_token);
211
  }
212

213
  const args = ["clone", "--quiet", "--depth", "1"];
214
  if (subfolder !== "") args.push("--sparse");
215
  if (branch !== "") args.push("--branch", branch);
216
  args.push(repo_url, repo_name);
217

218
  await runCommand(-1, "git", ...args);
219

220
  const fullPath = join(cwd, repo_name);
221
  process.chdir(fullPath);
222

223
  if (subfolder !== "") {
224
    await runCommand(undefined, "git", "sparse-checkout", "add", subfolder);
225
    const subfolderPath = join(fullPath, subfolder);
226

227
    if (!existsSync(subfolderPath)) {
228
      throw new Error(`Subfolder ${subfolder} does not exist.`);
229
    }
230

231
    process.chdir(subfolderPath);
232
  }
233

234
  // Get the commit hash
235
  const commitHash = (await runCommand(undefined, "git", "rev-parse", "HEAD")).trim();
236

237
  // Return to original directory
238
  process.chdir(cwd);
239

240
  return { repo_name, commitHash };
241
}
242

243
async function uploadDirectoryToS3(
244
  directoryPath: string,
245
  s3BasePath: string,
246
  workspace: string,
247
): Promise<number> {
248
  console.log(`Uploading ${directoryPath} -> ${s3BasePath}`);
249

250
  // Walk once into a flat task list so we can drive a bounded-concurrency pool.
251
  const tasks: { localPath: string; s3Key: string }[] = [];
252
  function walk(dir: string, s3Path: string) {
253
    for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
254
      const fullPath = join(dir, entry.name);
255
      const s3Key = s3Path ? `${s3Path}/${entry.name}` : entry.name;
256
      if (entry.isDirectory()) {
257
        walk(fullPath, s3Key);
258
      } else if (entry.isFile()) {
259
        tasks.push({ localPath: fullPath, s3Key });
260
      }
261
    }
262
  }
263
  walk(directoryPath, s3BasePath);
264
  console.log(`Discovered ${tasks.length} files to upload`);
265

266
  let nextIndex = 0;
267
  let uploaded = 0;
268
  let lastReport = 0;
269
  async function worker() {
270
    while (true) {
271
      const idx = nextIndex++;
272
      if (idx >= tasks.length) return;
273
      const { localPath, s3Key } = tasks[idx];
274
      const fileContent = fs.readFileSync(localPath);
275
      const blob = new Blob([fileContent], { type: 'application/octet-stream' });
276
      await wmillclient.HelpersService.gitRepoViewerFileUpload({
277
        workspace,
278
        fileKey: s3Key,
279
        requestBody: blob,
280
      });
281
      uploaded++;
282
      if (uploaded - lastReport >= 25 || uploaded === tasks.length) {
283
        lastReport = uploaded;
284
        console.log(`Uploaded ${uploaded} / ${tasks.length} files`);
285
      }
286
    }
287
  }
288
  await Promise.all(
289
    Array.from({ length: Math.min(UPLOAD_CONCURRENCY, tasks.length) }, () => worker())
290
  );
291

292
  // Marker is the LAST write — its presence is what the viewer checks for.
293
  const markerKey = `${s3BasePath}/${CLONE_MARKER_FILE}`;
294
  const markerBody = JSON.stringify({
295
    completed_at: new Date().toISOString(),
296
    file_count: tasks.length,
297
  });
298
  await wmillclient.HelpersService.gitRepoViewerFileUpload({
299
    workspace,
300
    fileKey: markerKey,
301
    requestBody: new Blob([markerBody], { type: 'application/json' }),
302
  });
303
  console.log(`Wrote completion marker: ${markerKey}`);
304

305
  return tasks.length;
306
}
307

308
function runCommand(secret_position: number | undefined, cmd: string, ...args: string[]): Promise<string> {
309
  const nargs = secret_position != undefined ? args.slice() : args;
310
  if (secret_position && secret_position < 0)
311
    secret_position = nargs.length - 1 + secret_position;
312

313
  let secret: string | undefined = undefined;
314
  if (secret_position != undefined) {
315
    nargs[secret_position] = "***";
316
    secret = args[secret_position];
317
  }
318
  console.log(`Running shell command: '${cmd} ${nargs.join(" ")} ...'`);
319

320
  return new Promise((resolve, reject) => {
321
    const process = spawn(cmd, args);
322

323
    let stdout = '';
324
    let stderr = '';
325

326
    process.stdout.on('data', (data) => {
327
      stdout += data.toString();
328
    });
329

330
    process.stderr.on('data', (data) => {
331
      stderr += data.toString();
332
    });
333

334
    process.on('error', (error) => {
335
      let errorString = error.toString();
336
      if (secret) errorString = errorString.replace(secret, "***");
337
      console.log(`Shell command FAILED: ${cmd}`, errorString);
338
      const e = new Error(
339
        `SH command '${cmd} ${nargs.join(" ")}' failed: ${errorString}`
340
      );
341
      reject(e);
342
    });
343

344
    process.on('close', (code) => {
345
      if (stdout.length > 0) {
346
        console.log("Shell stdout:", stdout);
347
      }
348
      if (stderr.length > 0) {
349
        console.log("Shell stderr:", stderr);
350
      }
351
      if (code === 0) {
352
        console.log(`Shell command completed successfully: ${cmd}`);
353
        resolve(stdout);
354
      } else {
355
        reject(new Error(`Command failed with code ${code}: ${stderr}`));
356
      }
357
    });
358
  });
359
}
360

361
async function get_gh_app_token() {
362
  const workspace = process.env["WM_WORKSPACE"];
363
  const jobToken = process.env["WM_TOKEN"];
364
  const baseUrl =
365
    process.env["BASE_INTERNAL_URL"] ??
366
    process.env["BASE_URL"] ??
367
    "http://localhost:8000";
368
  const url = `${baseUrl}/api/w/${workspace}/github_app/token`;
369

370
  const response = await fetch(url, {
371
    method: "POST",
372
    headers: {
373
      "Content-Type": "application/json",
374
      Authorization: `Bearer ${jobToken}`,
375
    },
376
    body: JSON.stringify({ job_token: jobToken }),
377
  });
378

379
  if (!response.ok) {
380
    const errorBody = await response.text().catch(() => "");
381
    throw new Error(`GitHub App token error (${response.status}): ${errorBody || response.statusText}`);
382
  }
383
  const data = await response.json();
384
  return data.token;
385
}
386

387
function prependTokenToGitHubUrl(gitHubUrl: string, installationToken: string) {
388
  const url = new URL(gitHubUrl);
389
  return `https://x-access-token:${installationToken}@${url.hostname}${url.pathname}`;
390
}
391