Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: microsoft/SynapseML
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: master
Choose a base ref
...
head repository: microsoft/SynapseML
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: brwals/build-container
Choose a head ref
Can’t automatically merge. Don’t worry, you can still create the pull request.
  • 12 commits
  • 15 files changed
  • 1 contributor

Commits on May 16, 2024

  1. Copy the full SHA
    046c9dc View commit details
  2. whitespace

    BrendanWalsh committed May 16, 2024
    Copy the full SHA
    d87c6b2 View commit details
  3. added dockerfile

    BrendanWalsh committed May 16, 2024
    Copy the full SHA
    6ca49ad View commit details
  4. cleaned up Dockerfile

    BrendanWalsh committed May 16, 2024
    Copy the full SHA
    bc46f50 View commit details
  5. Copy the full SHA
    7ff6fcb View commit details

Commits on Jun 4, 2024

  1. Copy the full SHA
    cddb40c View commit details

Commits on Jun 5, 2024

  1. Copy the full SHA
    c37643c View commit details
  2. Copy the full SHA
    36735b1 View commit details

Commits on Jun 6, 2024

  1. Copy the full SHA
    09316ab View commit details
  2. add offline dockerfile

    BrendanWalsh committed Jun 6, 2024
    Copy the full SHA
    44211e4 View commit details
  3. update

    BrendanWalsh committed Jun 6, 2024
    Copy the full SHA
    fed5005 View commit details

Commits on Jun 17, 2024

  1. update

    BrendanWalsh committed Jun 17, 2024
    Copy the full SHA
    b65547d View commit details
19 changes: 19 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"image": "synapseml-host",
"remoteUser": "root",
"customizations": {
"vscode": {
"extensions": [
"ms-python.python",
"ms-python.black-formatter",
"GitHub.copilot",
"scala-lang.scala",
"itryapitsin.Sbt",
"scalameta.metals"
]
}
},
"mounts": [
"source=${localEnv:HOME}${localEnv:USERPROFILE}/.azure,target=/home/dev/.azure/,type=bind,consistency=cached"
]
}
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -21,7 +21,6 @@
*.iml

# Generic editors
.vscode
.idea
.metals
.bloop
@@ -88,3 +87,5 @@ metastore_db/
**/dist/*
**/*.egg-info/*

# build container
!tools/docker/build/*
12 changes: 12 additions & 0 deletions .vscode/extensions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"recommendations": [
"ms-python.python",
"ms-python.black-formatter",
"GitHub.copilot",
"scala-lang.scala",
"itryapitsin.Sbt",
"scalameta.metals"
],
"unwantedRecommendations": [
]
}
6 changes: 6 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"files.watcherExclude": {
"**/target": true
},
"azure-pipelines.1ESPipelineTemplatesSchemaFile": true
}
8 changes: 8 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -343,6 +343,14 @@ lazy val root = (project in file("."))
ThisBuild / useCoursier := false
))

val compileAll = TaskKey[Unit]("compileAll", "compile all projects for publishing")
compileAll := {
compile.all(ScopeFilter(
inProjects(root, core, deepLearning, cognitive, vw, lightgbm, opencv),
inConfigurations(Compile))
).value
}

val setupTask = TaskKey[Unit]("setup", "set up library for intellij")
setupTask := {
compile.all(ScopeFilter(
77 changes: 40 additions & 37 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
name: synapseml
channels:
- conda-forge
- default
dependencies:
- python=3.11.8
- requests=2.26.0
@@ -10,44 +9,48 @@ dependencies:
- r-sparklyr=1.8.1
- r-devtools=2.4.2
- pip:
- pyarrow>=0.15.0
- azure-cli==2.60.0
- azure-storage-blob==12.20.0
- black[jupyter]==22.3.0
- black==22.3.0
- chardet==5.2.0
- conda-pack==0.7.1
- coverage==7.5.1
- flake8==7.0.0
- horovod==0.28.1
- huggingface-hub==0.23.0
- ipython==8.24.0
- jupyter==1.0.0
- langchain==0.0.152
- markdownify==0.12.1
- matplotlib==3.9.0
- mistletoe==1.3.0
- mlflow==2.12.2
- nbconvert==7.16.4
- nbformat==5.10.4
- numpy==1.26.4
- onnxmltools==1.7.0
- openai==0.27.5
- opencv-python==4.9.0.80
- pandas==1.5.0
- petastorm==0.12.1
- Pillow==10.3.0
- pyarrow==15.0.2
- PyGithub==1.59.1
- pypandoc==1.13
- pyspark==3.4.1
- pandas==1.4.0
- wheel
- sphinx==5.0.2
- pytest-codeblocks==0.17.0
- pytest-cov==5.0.0
- pytest==8.2.0
- pytorch_lightning==1.5.0
- PyYAML==6.0.1
- sphinx_paramlinks==0.5.2
- sphinx_rtd_theme
- coverage
- pytest
- pytest-cov
- nbconvert
- nbformat
- pyyaml
- PyGithub
- tqdm
- ipython
- pytest-codeblocks
- azure-storage-blob
- twine
- jupyter
- mlflow
- numpy
- sphinx_rtd_theme==2.0.0
- sphinx==5.0.2
- torch==2.0.0
- torchvision==0.15.1
- horovod==0.28.1
- petastorm>=0.11.0
- pytorch_lightning==1.5.0
- onnxmltools==1.7.0
- matplotlib
- Pillow
- tqdm==4.66.4
- traitlets==5.14.3
- transformers==4.32.1
- huggingface-hub>=0.8.1
- langchain==0.0.152
- openai==0.27.5
- black==22.3.0
- black[jupyter]==22.3.0
- mistletoe
- pypandoc
- markdownify
- traitlets
- opencv-python
- twine==5.0.0
- wheel==0.43.0
2 changes: 2 additions & 0 deletions project/Secrets.scala
Original file line number Diff line number Diff line change
@@ -175,6 +175,7 @@ object Secrets {
lazy val pgpPublicFile: File = getPgpSecretFile(PgpPublicSecretName, PgpPublicEnvVarName)

lazy val publishToFeed: Boolean = sys.env.getOrElse(PublishToFeed, "false").toBoolean
lazy val publishToVHD: Boolean = sys.env.getOrElse(PublishToVHD, "false").toBoolean

val ADOFeedTokenSecretName: String = "ado-feed-token"
val ADOFeedTokenEnvVarName: String = "ADO-FEED-TOKEN"
@@ -191,4 +192,5 @@ object Secrets {
val PypiApiSecretName: String = "pypi-api-token"
val PypiApiEnvVarName: String = "PYPI-API-TOKEN"
val PublishToFeed: String = "PUBLISH-TO-FEED"
val PublishToVHD: String = "PUBLISH-TO-VHD"
}
3 changes: 3 additions & 0 deletions sonatype.sbt
Original file line number Diff line number Diff line change
@@ -38,6 +38,9 @@ pgpPublicRing := Secrets.pgpPublicFile
if(Secrets.publishToFeed) {
ThisBuild / publishTo := Some("SynapseML_PublicPackages" at
"https://msdata.pkgs.visualstudio.com/A365/_packaging/SynapseML_PublicPackages/maven/v1")
} else if (Secrets.publishToVHD) {
ThisBuild / publishTo := Some("BBC-VHD_PublicPackages" at
"https://pkgs.dev.azure.com/msdata/A365/_packaging/BBC-VHD_PublicPackages/maven/v1")
} else {
ThisBuild / publishTo := sonatypePublishToBundle.value
}
38 changes: 38 additions & 0 deletions tools/docker/build/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
FROM mcr.microsoft.com/onebranch/cbl-mariner/build:2.0

USER root

SHELL ["/bin/bash", "--login", "-c"]

RUN mkdir -p /etc/micromamba

RUN wget -qO- https://aka.ms/install-artifacts-credprovider.sh | bash && \
curl -Lv https://micro.mamba.pm/api/micromamba/linux-64/latest -o /tmp/micromamba.tar.bz2 && \
ls -lh /tmp/micromamba.tar.bz2 && \
tar -xvjf /tmp/micromamba.tar.bz2 -C /tmp && \
ls -lh /tmp && \
mv /tmp/bin/micromamba /bin/micromamba && \
/bin/micromamba shell init -s bash -p ~/micromamba && \
/bin/micromamba config append channels conda-forge && \
/bin/micromamba config set channel_priority strict

RUN curl -fLv https://github.com/coursier/coursier/releases/latest/download/cs-x86_64-pc-linux.gz | gzip -d > cs && chmod +x cs && ./cs setup --apps scala,scalac,sbt --yes && \
echo "export PATH=$PATH:/root/.local/share/coursier/bin" >> ~/.bashrc

RUN tdnf -y update && \
tdnf -y install msopenjdk-11 vi conda && \
python3.9 -m pip install -U --force-reinstall charset-normalizer && \
python3.9 -m pip install conda-pack && \
tdnf clean all

RUN git config --global core.autocrlf true

COPY environment.yml /etc/micromamba/environment.yml
RUN micromamba env create -f /etc/micromamba/environment.yml -y

COPY website/environment.yml /etc/micromamba/website_environment.yml
RUN micromamba env create -f /etc/micromamba/website_environment.yml -y

RUN echo "micromamba activate synapseml" >> ~/.bashrc

WORKDIR /
38 changes: 38 additions & 0 deletions tools/docker/build/Dockerfile.offline
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
FROM mcr.microsoft.com/onebranch/cbl-mariner/build:2.0

USER root

SHELL ["/bin/bash", "--login", "-c"]

ARG PACKAGES_DIR
COPY ${PACKAGES_DIR} /pkgs

RUN mkdir -p /etc/micromamba

RUN wget -qO- https://aka.ms/install-artifacts-credprovider.sh | bash && \
tar -xvjf /pkgs/micromamba.tar.bz2 -C /bin/micromamba && \
mv /tmp/bin/micromamba /bin/micromamba && \
/bin/micromamba shell init -s bash -p ~/micromamba && \
/bin/micromamba config append channels conda-forge && \
/bin/micromamba config set channel_priority strict

RUN gzip -d /pkgs/cs-x86_64-pc-linux.gz > cs && chmod +x cs && ./cs setup --apps scala,scalac,sbt --yes && \
echo "export PATH=$PATH:/root/.local/share/coursier/bin" >> ~/.bashrc

RUN tdnf -y update && \
tdnf -y install msopenjdk-11 vi conda && \
python3.9 -m pip install -U --force-reinstall charset-normalizer && \
python3.9 -m pip install conda-pack && \
tdnf clean all

RUN git config --global core.autocrlf true

COPY environment.yml /etc/micromamba/environment.yml
RUN micromamba env create -f /etc/micromamba/environment.yml -y

COPY website/environment.yml /etc/micromamba/website_environment.yml
RUN micromamba env create -f /etc/micromamba/website_environment.yml -y

RUN echo "micromamba activate synapseml" >> ~/.bashrc

WORKDIR /
5 changes: 5 additions & 0 deletions tools/docker/build/build.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/usr/bin/env pwsh
$ScriptPath = $MyInvocation.MyCommand.Path
$ScriptDir = Split-Path -Parent $ScriptPath
$DockerContext = Resolve-Path "$ScriptDir\..\..\.."
docker build -t synapseml-host -f "$ScriptDir\Dockerfile" $DockerContext
5 changes: 5 additions & 0 deletions tools/docker/build/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash
SCRIPT_PATH=$(realpath "$0")
SCRIPT_DIR=$(dirname "$SCRIPT_PATH")
DOCKER_CONTEXT=$(realpath "$SCRIPT_DIR/../../..")
docker build -t synapseml-host -f "$SCRIPT_DIR/Dockerfile" "$DOCKER_CONTEXT"
60 changes: 35 additions & 25 deletions tools/esrp/prepare_jar.py
Original file line number Diff line number Diff line change
@@ -3,11 +3,6 @@
import shutil
import glob

current_username = getpass.getuser()

root_dir = f"/home/{current_username}/.ivy2/local/com.microsoft.azure/"


def find_second_level_folder(root):
# Walk through the root directory
for foldername, subfolders, filenames in os.walk(root):
@@ -18,10 +13,6 @@ def find_second_level_folder(root):
# Return None if no such folder is found
return None


version = find_second_level_folder(root_dir)


def flatten_dir(top_dir):
# Collect directories to delete
directories_to_delete = []
@@ -61,19 +52,38 @@ def flatten_dir(top_dir):
os.rmdir(directory)
print(f"Deleted: {directory}")


for top_dir in os.listdir(root_dir):
path_to_jars = os.path.join(root_dir, top_dir)
flatten_dir(path_to_jars)

for file in os.listdir(path_to_jars):
if "_2.12" in file and version not in file:
old_file_path = os.path.join(path_to_jars, file)
name_parts = file.split("_2.12")
if name_parts[1].startswith(".") or name_parts[1].startswith("-"):
sep_char = ""
else:
sep_char = "-"
new_file = f"{name_parts[0]}_2.12-{version}{sep_char}{name_parts[1]}"
new_file_path = os.path.join(path_to_jars, new_file)
shutil.move(old_file_path, new_file_path)
def prepare_jar(root_dir, version):
for top_dir in os.listdir(root_dir):
path_to_jars = os.path.join(root_dir, top_dir)
flatten_dir(path_to_jars)

for file in os.listdir(path_to_jars):
if "_2.12" in file and version not in file:
old_file_path = os.path.join(path_to_jars, file)
name_parts = file.split("_2.12")
if name_parts[1].startswith(".") or name_parts[1].startswith("-"):
sep_char = ""
else:
sep_char = "-"
new_file = f"{name_parts[0]}_2.12-{version}{sep_char}{name_parts[1]}"
new_file_path = os.path.join(path_to_jars, new_file)
shutil.move(old_file_path, new_file_path)

if __name__ == "__main__":
import argparse

default_path = f"/home/{getpass.getuser()}/.ivy2/local/com.microsoft.azure/"

parser = argparse.ArgumentParser(description="Prepare a jar for ESRP.")
parser.add_argument("--path",
type=str,
default=default_path,
help="The root directory of the package, e.g. /home/<user>/.ivy2/local/com.microsoft.azure/")

if parser.parse_args().path != default_path:
print("Non-default path passed: ", parser.parse_args().path)

root_dir = parser.parse_args().path
version = find_second_level_folder(root_dir)
print("Using root_dir: ", root_dir, " and version: ", version)
prepare_jar(root_dir, version)
8 changes: 8 additions & 0 deletions website/README.md
Original file line number Diff line number Diff line change
@@ -2,6 +2,14 @@

This website is built using [Docusaurus 2](https://v2.docusaurus.io/), a modern static website generator.

### Environment

```
$ conda env create -f website/environment.yml
$ conda activate synapseml-website
$ cd website
```

### Installation

```
5 changes: 5 additions & 0 deletions website/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
name: synapseml-website
channels:
- conda-forge
dependencies:
- conda-forge::yarn=1.22.19