Skip to content

Commit

Permalink
feat(computer-vision): add open clip (#22084)
Browse files Browse the repository at this point in the history
  • Loading branch information
hongbo-miao authored Dec 29, 2024
1 parent 9907f9e commit e6a8b89
Show file tree
Hide file tree
Showing 23 changed files with 1,180 additions and 1 deletion.
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ computational-fluid-dynamics/openfoam/simulations/*/0.*
computational-fluid-dynamics/openfoam/simulations/*/constant/polyMesh
computer-vision/hm-imagebind/.checkpoints
computer-vision/hm-imagebind/data
computer-vision/open-clip/data
data-analytics/hm-geopandas/data
data-ingestion/fluent-bit/*/data
data-ingestion/vector/*/data
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/.static-type-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ jobs:
uv run poe static-type-check-python --package=cloud-platform.aws.amazon-sagemaker.pytorch-mnist
uv run poe static-type-check-python --package=cloud-platform.aws.aws-parallelcluster.pcluster
uv run poe static-type-check-python --package=computer-vision.hm-imagebind
uv run poe static-type-check-python --package=computer-vision.open-clip
uv run poe static-type-check-python --package=computer-vision.hm-open3d
uv run poe static-type-check-python --package=computer-vision.hm-pyvista.mount-saint-helens
uv run poe static-type-check-python --package=data-analytics.hm-cudf
Expand Down
37 changes: 37 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ jobs:
hm-streamlit-live-line-chart: ${{ steps.filter.outputs.hm-streamlit-live-line-chart }}
hm-streamlit-map: ${{ steps.filter.outputs.hm-streamlit-map }}
hm-imagebind: ${{ steps.filter.outputs.hm-imagebind }}
open-clip: ${{ steps.filter.outputs.open-clip }}
hm-jax: ${{ steps.filter.outputs.hm-jax }}
hm-kubeflow-calculate: ${{ steps.filter.outputs.hm-kubeflow-calculate }}
hm-kubeflow-classify-mnist: ${{ steps.filter.outputs.hm-kubeflow-classify-mnist }}
Expand Down Expand Up @@ -272,6 +273,9 @@ jobs:
hm-imagebind:
- '.github/workflows/test.yml'
- 'computer-vision/hm-imagebind/**'
open-clip:
- '.github/workflows/test.yml'
- 'computer-vision/open-clip/**'
hm-jax:
- '.github/workflows/test.yml'
- 'high-performance-computing/hm-jax/**'
Expand Down Expand Up @@ -2481,6 +2485,39 @@ jobs:
with:
directory: computer-vision/hm-imagebind

open-clip-test:
name: OpenCLIP | Test
needs: detect-changes
if: ${{ needs.detect-changes.outputs.open-clip == 'true' }}
runs-on: ubuntu-24.04
environment: test
timeout-minutes: 10
steps:
- name: Checkout
uses: actions/[email protected]
- name: Install uv
uses: astral-sh/[email protected]
with:
version: 0.5.11
enable-cache: true
cache-dependency-glob: computer-vision/open-clip/uv.lock
- name: Set up Python
uses: actions/[email protected]
with:
python-version-file: computer-vision/open-clip/pyproject.toml
- name: Install dependencies
working-directory: computer-vision/open-clip
run: |
uv sync --dev
- name: Test
working-directory: computer-vision/open-clip
run: |
uv run poe test-coverage
- name: Upload coverage to Codecov
uses: codecov/[email protected]
with:
directory: computer-vision/open-clip

open3d-test:
name: Open3D | Test
needs: detect-changes
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ computational-fluid-dynamics/openfoam/simulations/*/0.*
computational-fluid-dynamics/openfoam/simulations/*/constant/polyMesh
computer-vision/hm-imagebind/.checkpoints
computer-vision/hm-imagebind/data
computer-vision/open-clip/data
data-analytics/hm-geopandas/data
data-ingestion/fluent-bit/*/data
data-ingestion/vector/*/data
Expand Down
1 change: 1 addition & 0 deletions .markdownlint-cli2.jsonc
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
"computational-fluid-dynamics/openfoam/simulations/*/constant/polyMesh",
"computer-vision/hm-imagebind/.checkpoints",
"computer-vision/hm-imagebind/data",
"computer-vision/open-clip/data",
"cloud-security/hm-prowler/output",
"data-analytics/hm-geopandas/data",
"data-ingestion/fluent-bit/*/data",
Expand Down
3 changes: 3 additions & 0 deletions .mergify.yml
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,9 @@ pull_request_rules:
- or:
- check-success=ImageBind | Test
- check-skipped=ImageBind | Test
- or:
- check-success=OpenCLIP | Test
- check-skipped=OpenCLIP | Test
- or:
- check-success=Open3D | Test
- check-skipped=Open3D | Test
Expand Down
1 change: 1 addition & 0 deletions .prettierignore
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ computational-fluid-dynamics/openfoam/simulations/*/0.*
computational-fluid-dynamics/openfoam/simulations/*/constant/polyMesh
computer-vision/hm-imagebind/.checkpoints
computer-vision/hm-imagebind/data
computer-vision/open-clip/data
data-analytics/hm-geopandas/data
data-ingestion/fluent-bit/*/data
data-ingestion/vector/*/data
Expand Down
1 change: 1 addition & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ AllCops:
- 'computational-fluid-dynamics/openfoam/simulations/*/constant/polyMesh/**/*'
- 'computer-vision/hm-imagebind/.checkpoints/**/*'
- 'computer-vision/hm-imagebind/data/**/*'
- 'computer-vision/open-clip/data/**/*'
- 'data-analytics/hm-geopandas/data/**/*'
- 'data-ingestion/fluent-bit/*/data/**/*'
- 'data-ingestion/vector/*/data/**/*'
Expand Down
1 change: 1 addition & 0 deletions .ruff.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ exclude = [
"computational-fluid-dynamics/openfoam/simulations/*/constant/polyMesh",
"computer-vision/hm-imagebind/.checkpoints",
"computer-vision/hm-imagebind/data",
"computer-vision/open-clip/data",
"data-analytics/hm-geopandas/data",
"data-ingestion/fluent-bit/*/data",
"data-ingestion/vector/*/data",
Expand Down
1 change: 1 addition & 0 deletions .solhintignore
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ computational-fluid-dynamics/openfoam/simulations/*/0.*
computational-fluid-dynamics/openfoam/simulations/*/constant/polyMesh
computer-vision/hm-imagebind/.checkpoints
computer-vision/hm-imagebind/data
computer-vision/open-clip/data
data-analytics/hm-geopandas/data
data-ingestion/fluent-bit/*/data
data-ingestion/vector/*/data
Expand Down
1 change: 1 addition & 0 deletions .sqlfluffignore
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ computational-fluid-dynamics/openfoam/simulations/*/0.*
computational-fluid-dynamics/openfoam/simulations/*/constant/polyMesh
computer-vision/hm-imagebind/.checkpoints
computer-vision/hm-imagebind/data
computer-vision/open-clip/data
data-analytics/hm-geopandas/data
data-ingestion/fluent-bit/*/data
data-ingestion/vector/*/data
Expand Down
1 change: 1 addition & 0 deletions .stylelintignore
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ computational-fluid-dynamics/openfoam/simulations/*/0.*
computational-fluid-dynamics/openfoam/simulations/*/constant/polyMesh
computer-vision/hm-imagebind/.checkpoints
computer-vision/hm-imagebind/data
computer-vision/open-clip/data
data-analytics/hm-geopandas/data
data-ingestion/fluent-bit/*/data
data-ingestion/vector/*/data
Expand Down
1 change: 1 addition & 0 deletions .textlintignore
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ computational-fluid-dynamics/openfoam/simulations/*/0.*/**/*
computational-fluid-dynamics/openfoam/simulations/*/constant/polyMesh/**/*
computer-vision/hm-imagebind/.checkpoints/**/*
computer-vision/hm-imagebind/data/**/*
computer-vision/open-clip/data/**/*
data-analytics/hm-geopandas/data/**/*
data-ingestion/fluent-bit/*/data/**/*
data-ingestion/vector/*/data/**/*
Expand Down
1 change: 1 addition & 0 deletions .yamllint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ ignore: |
computational-fluid-dynamics/openfoam/simulations/*/constant/polyMesh
computer-vision/hm-imagebind/.checkpoints
computer-vision/hm-imagebind/data
computer-vision/open-clip/data
data-analytics/hm-geopandas/data
data-ingestion/fluent-bit/*/data
data-ingestion/vector/*/data
Expand Down
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ static-type-check-python:
uv run poe static-type-check-python --package=cloud-platform.aws.amazon-sagemaker.pytorch-mnist
uv run poe static-type-check-python --package=cloud-platform.aws.aws-parallelcluster.pcluster
uv run poe static-type-check-python --package=computer-vision.hm-imagebind
uv run poe static-type-check-python --package=computer-vision.open-clip
uv run poe static-type-check-python --package=computer-vision.hm-open3d
uv run poe static-type-check-python --package=computer-vision.hm-pyvista.mount-saint-helens
uv run poe static-type-check-python --package=data-analytics.hm-cudf
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,8 @@ The diagram illustrates the repository's architecture, which is considered overl

## Computer Vision

- **ImageBind** - Multimodal embedding model for images, video, audio, text, depth, thermal, and inertial measurement units (IMUs)
- **ImageBind** - Multimodal embedding model for images, text, audio, video, depth, thermal, and inertial measurement units (IMUs)
- **OpenCLIP** - Multimodal embedding model for images and text
- **OpenCV** - Computer vision library
- **supervision** - Computer vision library
- **Ultralytics YOLOv8** - Object detection model
Expand Down
16 changes: 16 additions & 0 deletions computer-vision/open-clip/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
uv-install-python::
uv python install
uv-update-lock-file:
uv lock
uv-install-dependencies:
uv sync --dev

uv-run-dev:
uv run poe dev
uv-run-test:
uv run poe test
uv-run-test-coverage:
uv run poe test-coverage

download-dataset:
curl --fail --show-error --location https://thor.robots.ox.ac.uk/~vgg/data/pets/images.tar.gz | tar --extract --directory=data
Empty file.
26 changes: 26 additions & 0 deletions computer-vision/open-clip/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
[project]
name = "mount-saint-helens"
version = "1.0.0"
requires-python = "~=3.12.0"
dependencies = [
"pyvista==0.44.2",
"lancedb==0.17.0",
"open-clip-torch==2.29.0",
"pillow==11.0.0",
"pandas==2.2.3",
]

[dependency-groups]
dev = [
"poethepoet==0.32.0",
"pytest==8.3.4",
"pytest-cov==6.0.0",
]

[tool.uv]
package = false

[tool.poe.tasks]
dev = "python src/main.py"
test = "pytest --verbose --verbose"
test-coverage = "pytest --cov=. --cov-report=xml"
3 changes: 3 additions & 0 deletions computer-vision/open-clip/src/dummy_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
class TestDummy:
def test_dummy(self):
assert 1 + 1 == 2
62 changes: 62 additions & 0 deletions computer-vision/open-clip/src/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import logging
from pathlib import Path
from random import sample

import lancedb
import pandas as pd
from lancedb.embeddings import get_registry
from lancedb.pydantic import LanceModel, Vector
from PIL import Image

logging.basicConfig(level=logging.INFO)

EMBEDDINGS = (
get_registry()
.get("open-clip")
.create(
name="ViT-B-32", pretrained="laion2b_s34b_b79k", batch_size=64, device="cpu"
)
)


class Pets(LanceModel):
vector: Vector(EMBEDDINGS.ndims()) = EMBEDDINGS.VectorField() # type: ignore
image_uri: str = EMBEDDINGS.SourceField()


def main():
db = lancedb.connect("/tmp/lancedb")

# Create or get the table
if "pets" in db:
logging.info("Using existing table")
table = db["pets"]
else:
logging.info("Creating new table")
table = db.create_table("pets", schema=Pets, mode="overwrite")
# Use a sampling of images from the specified directory
image_dir = Path("data/images")
logging.info(f"Loading images from directory: {image_dir}")
uris = [str(f) for f in image_dir.glob("*.jpg")]
uris = sample(uris, 1000)
logging.info(f"Processing {len(uris)} images")
table.add(pd.DataFrame({"image_uri": uris}))

# Query using text
query_text = "black cat"
logging.info(f"Performing text search with query: '{query_text}'")
search_results = table.search(query_text).limit(3).to_pydantic(Pets)
for idx, result in enumerate(search_results):
logging.info(f"Text search result {idx + 1}: {result.image_uri}")

# Query using an image
query_image_path = Path("data/images/samoyed_27.jpg").expanduser()
logging.info(f"Performing image search with query image: {query_image_path}")
query_image = Image.open(query_image_path)
search_results = table.search(query_image).limit(3).to_pydantic(Pets)
for idx, result in enumerate(search_results):
logging.info(f"Image search result {idx + 1}: {result.image_uri}")


if __name__ == "__main__":
main()
Loading

0 comments on commit e6a8b89

Please sign in to comment.