Step 1: Implement the GithubRepoRunner Component

The GithubRepoRunner Component clones a repo, runs a specific script with provided arguments, and collect logs.

Let’s dive in on how to develop the component with the following code:

import io
import os
import subprocess
import sys
from copy import deepcopy
from functools import partial
from subprocess import Popen
from typing import Dict, List, Optional

from lightning import BuildConfig, CloudCompute, LightningApp, LightningFlow
from lightning.app import structures
from lightning.app.components import TracerPythonScript
from lightning.app.frontend import StreamlitFrontend
from lightning.app.storage.path import Path
from lightning.app.utilities.state import AppState


class GithubRepoRunner(TracerPythonScript):
    def __init__(
        self,
        id: str,
        github_repo: str,
        script_path: str,
        script_args: List[str],
        requirements: List[str],
        cloud_compute: Optional[CloudCompute] = None,
        **kwargs,
    ):
        """The GithubRepoRunner Component clones a repo, runs a specific script with provided arguments and collect
        logs.

        Arguments:
            id: Identified of the component.
            github_repo: The Github Repo URL to clone.
            script_path: The path to the script to execute.
            script_args: The arguments to be provided to the script.
            requirements: The python requirements tp run the script.
            cloud_compute: The object to select the cloud instance.

        """
        super().__init__(
            script_path=script_path,
            script_args=script_args,
            cloud_compute=cloud_compute,
            cloud_build_config=BuildConfig(requirements=requirements),
            **kwargs,
        )
        self.id = id
        self.github_repo = github_repo
        self.logs = []

    def run(self, *args, **kwargs):
        # 1. Hack: Patch stdout so we can capture the logs.
        string_io = io.StringIO()
        sys.stdout = string_io

        # 2: Use git command line to clone the repo.
        repo_name = self.github_repo.split("/")[-1].replace(".git", "")
        cwd = os.path.dirname(__file__)
        subprocess.Popen(
            f"git clone {self.github_repo}", cwd=cwd, shell=True).wait()

        # 3: Execute the parent run method of the TracerPythonScript class.
        os.chdir(os.path.join(cwd, repo_name))
        super().run(*args, **kwargs)

        # 4: Get all the collected logs and add them to the state.
        # This isn't optimal as heavy, but works for this demo purpose.
        self.logs = string_io.getvalue()
        string_io.close()

    def configure_layout(self):

Tutorial