{"config":{"visibility":"public","require_context":true,"workspace":"bioimage-io","flags":[],"singleton":false,"created_by":{"id":"wary-cicada-01833057"},"service_embedding":null,"authorized_workspaces":null},"id":"bioimage-io/bioengine-worker-8498678846-rcvsx-k8j4oqga:cellpose-finetuning","name":"Cellpose Fine-Tuning","type":"bioengine-app","description":"A BioEngine service for running Cellpose-SAM 4.0.7 inference and fine-tuning cell segmentation models.","docs":null,"app_id":"*","service_schema":{"debug_task_info":{"type":"function","function":{"name":"debug_task_info","description":"Debug information about the training task.","parameters":{"properties":{"session_id":{"description":"Session ID to debug","type":"string"}},"required":["session_id"],"type":"object"}}},"delete_training_session":{"type":"function","function":{"name":"delete_training_session","description":"Delete a training session.\n\nOnly the session owner can delete their own session. The session must\nnot be currently running.","parameters":{"properties":{"session_id":{"description":"Session ID to delete","type":"string"}},"required":["session_id"],"type":"object"}}},"export_model":{"type":"function","function":{"name":"export_model","description":"Export trained model as BioImage.io package to artifact manager.\n\nThis function packages the trained model with all necessary files for\nBioImage.io compatibility, including model weights, architecture code,\ntest samples, cover image, and RDF descriptor.\n\nArgs:\n    session_id: ID of completed training session\n    model_name: Custom name for the model (optional)\n    description: Optional model description text appended to the\n        default RDF description\n    collection: Target collection in format \"workspace/collection\"\n    authors: Optional list of model authors, each with required `name`\n        and optional `affiliation`\n    uploader: Optional uploader with required `name` and `email`\n\nReturns:\n    Dictionary containing:\n        - artifact_id: ID of uploaded artifact\n        - model_name: Name of the model\n        - status: Export status\n        - url: URL to view the model\n\nRaises:\n    ValueError: If session doesn't exist or training not completed\n    RuntimeError: If export or upload fails","parameters":{"properties":{"session_id":{"description":"Training session ID to export","type":"string"},"model_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"description":"Optional custom name for the model (defaults to cellpose-{session_id})"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"description":"Optional model description text appended to the default RDF description."},"authors":{"anyOf":[{"items":{"additionalProperties":true,"type":"object"},"type":"array"},{"type":"null"}],"default":null,"description":"Optional list of model authors. Each entry requires `name` and may include `affiliation`.","examples":[[{"affiliation":"KTH Royal Institute of Technology","name":"Nils Mechtel"}]]},"uploader":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"default":null,"description":"Optional uploader metadata with required `name` and `email`.","examples":[{"email":"nils.mech@gmail.com","name":"Nils Mechtel"}]},"collection":{"default":"bioimage-io/colab-annotations","description":"Collection to upload to (format: workspace/collection)","type":"string"}},"required":["session_id"],"type":"object"}}},"get_training_status":{"type":"function","function":{"name":"get_training_status","description":"Retrieve the current status of a training session.\n\nUse ``get_trained_model_path(session_id)`` to obtain the final saved model\npath once the session is completed.","parameters":{"properties":{"session_id":{"description":"Identifier returned by ``start_training``.","type":"string"}},"required":["session_id"],"type":"object"}}},"infer":{"type":"function","function":{"name":"infer","description":"Run Cellpose inference on artifact images and return encoded masks.\n\nImages are fetched from the specified artifact into a local cache as\nneeded. For each input path, the corresponding mask array is returned as\nan NPY-serialized base64 payload suitable for cross-language transport.\n\nOptionally, flows can be returned by setting return_flows=True, which includes\nflow visualization, XY flows, cell probability, and final pixel positions.","parameters":{"properties":{"artifact":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"description":"Artifact identifier 'workspace/alias' containing the source images."},"image_paths":{"anyOf":[{"items":{"type":"string"},"minItems":1,"type":"array"},{"type":"null"}],"default":null,"description":"List of artifact-relative image paths to segment"},"input_arrays":{"anyOf":[{"items":{},"minItems":1,"type":"array"},{"type":"null"}],"default":null,"description":"List of numpy ndarrays to segment","examples":[[[0,0,0],[255,255,255]]]},"model":{"default":"cpsam","description":"Identifier of the Cellpose model to use for inference. Either a built-in pretrained model name, a local session ID of a finetuned model, or a published model artifact id/url exported by this deployment.","examples":["abc123ef-4567-890a-bcde-f1234567890a","bioimage-io/cellpose-sam-58a077a9-20260318021633",["cpsam"]],"type":"string"},"diameter":{"anyOf":[{"minimum":0,"type":"number"},{"type":"null"}],"default":null,"description":"Approximate object diameter; if None, Cellpose will estimate it"},"flow_threshold":{"default":0.4,"description":"Flow error threshold for dynamics. Higher values allow more masks. Decrease if too many ill-shaped ROIs are returned.","minimum":0,"type":"number"},"cellprob_threshold":{"default":0.0,"description":"Cell probability threshold. Decrease to find more ROIs, increase to filter out dim areas.","type":"number"},"niter":{"anyOf":[{"minimum":0,"type":"integer"},{"type":"null"}],"default":null,"description":"Number of iterations for flow dynamics. If None, automatically set based on diameter. Use higher values (e.g., 250) for better convergence."},"return_flows":{"default":false,"description":"If True, return flows in addition to masks. Flows include: [0] HSV flow visualization, [1] XY flows at each pixel, [2] cell probability map, [3] final pixel locations after dynamics.","type":"boolean"},"json_safe":{"default":false,"description":"If True, return JSON-safe nested arrays for masks/flows instead of numpy arrays. Useful for browser clients.","type":"boolean"},"enable_clahe":{"default":false,"description":"Apply CLAHE preprocessing to input images before inference. Recommended for brightfield or phase-contrast microscopy images. Converts to grayscale and applies contrast-limited adaptive histogram equalization (clipLimit=3.0, tileGridSize=16×16).","type":"boolean"}},"type":"object"}}},"list_models_by_dataset":{"type":"function","function":{"name":"list_models_by_dataset","description":"List all models trained on a specific dataset.\n\nThis function queries the artifact manager to find all model artifacts\nthat were trained on the specified dataset by checking the\ntraining_dataset_id in the artifact config. Only searches the\nbioimage-io/colab-annotations collection.\n\nArgs:\n    dataset_id: ID of the dataset artifact\n\nReturns:\n    List of model artifacts, each containing:\n        - id: Model artifact ID\n        - name: Model name\n        - created_at: Creation timestamp\n        - url: URL to view the model\n\nRaises:\n    RuntimeError: If query fails","parameters":{"properties":{"dataset_id":{"description":"Dataset artifact ID to find models trained on it","type":"string"}},"required":["dataset_id"],"type":"object"}}},"list_training_sessions":{"type":"function","function":{"name":"list_training_sessions","description":"List all known training sessions with their current or final status.\n\nIncludes running sessions tracked in-memory and finished sessions recorded in\nthe session history. For completed sessions, the saved model path is included\nif available.","parameters":{"properties":{"status_types":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"default":null,"description":"Optional list of statuses to include. Values must be among {running, stopped, completed, failed, unknown}.","examples":[["running","completed"]]},"dataset_artifact_ids":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"default":null,"description":"Optional list of dataset artifact IDs to filter sessions.","examples":[["ri-scale/zarr-demo","ri-scale/another-dataset"]]},"labels":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"default":null,"description":"Optional list of annotation labels to filter sessions (e.g. ['cells']).","examples":[["cells"],["nucleus","cells"]]},"limit":{"default":50,"description":"Maximum number of sessions to return (most recent first)","type":"integer"}},"type":"object"}}},"restart_training":{"type":"function","function":{"name":"restart_training","description":"Restart a stopped/interrupted/failed training session.\n\nA new session is created. If a checkpoint exists for ``session_id``, the\nnew run uses that checkpoint as the starting model.","parameters":{"properties":{"session_id":{"description":"Session ID to restart from","type":"string"},"n_epochs":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"description":"Optional epoch override for restarted run"}},"required":["session_id"],"type":"object"}}},"start_training":{"type":"function","function":{"name":"start_training","description":"Start asynchronous finetuning of a Cellpose model on an artifact dataset.\n\nThis downloads metadata and the referenced image/annotation files from the\ngiven Hypha artifact to a local cache and launches training in the\nbackground. Use ``get_training_status`` to poll progress or ``stop_training``\nto cancel.","parameters":{"properties":{"artifact":{"description":"Artifact identifier 'workspace/alias' containing TIFF images and annotations for training.","examples":["ri-scale/zarr-demo"],"type":"string"},"train_images":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"description":"Path to training images. Can be either:\n1. Folder path ending with '/' (assumes same filenames as annotations)\n2. Path pattern with wildcard (e.g., 'images/*/*.tif').\nOptional if metadata_dir is provided.","examples":["images/108bb69d-2e52-4382-8100-e96173db24ee/","images/*/*.tif"]},"train_annotations":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"description":"Path to training annotations. Can be either:\n1. Folder path ending with '/' (assumes same filenames as images)\n2. Path pattern with wildcard (e.g., 'annotations/folder/*_mask.ome.tif')\nThe * part in patterns must match between images and annotations. Optional if metadata_dir is provided.","examples":["annotations/108bb69d-2e52-4382-8100-e96173db24ee/","annotations/folder/*_mask.ome.tif"]},"metadata_dir":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"description":"Optional metadata directory containing JSON files with image/annotation paths (e.g., image_path and mask_path).","examples":["metadata/"]},"test_images":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"description":"Optional path to test images. Same format as train_images. Providing test data enables per-epoch pixel-level validation metrics and end-of-training instance segmentation metrics (AP@0.5/0.75/0.9).","examples":["images/test/","images/test/*.ome.tif"]},"test_annotations":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"description":"Optional path to test annotations (label masks). Same format as train_annotations. Required together with test_images.","examples":["annotations/test/","annotations/test/*_mask.ome.tif"]},"model":{"default":"cpsam","description":"Name of Cellpose model to finetune. Must be a builtin pretrained Cellpose model or The session ID of a previously finetuned model file, or a published model artifact id/url exported by this deployment.","examples":["abc123ef-4567-890a-bcde-f1234567890a","bioimage-io/cellpose-sam-58a077a9-20260318021633","cpsam"],"type":"string"},"n_samples":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"description":"Optional number of samples to use from the dataset. If None, all available samples are used."},"n_epochs":{"default":10,"description":"Number of training epochs","type":"integer"},"learning_rate":{"default":1e-06,"description":"Learning rate","type":"number"},"weight_decay":{"default":0.0001,"description":"Weight decay","type":"number"},"min_train_masks":{"default":5,"description":"Minimum number of masks per training batch. Lower values speed up training, useful for quick testing with large sample sets. Cellpose default is 5.","type":"integer"},"validation_interval":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"description":"Epochs between validation evaluations. Always validates on the first epoch. Default (None) validates every 10 epochs. Set to 1 for every epoch. Requires test_images and test_annotations."},"enable_clahe":{"default":false,"description":"Apply CLAHE (Contrast Limited Adaptive Histogram Equalization) preprocessing to training and test images before training. Recommended for brightfield or phase-contrast microscopy images where raw pixel values span a narrow range (e.g. 19–81 out of 255). Without CLAHE, Cellpose-SAM cannot detect cells in such images.","type":"boolean"},"rescale":{"default":false,"description":"Whether to rescale images by estimated cell diameter during training. When True, Cellpose estimates a per-image diameter and rescales so cells match the model's expected size — this can substantially improve fine-tuning on datasets where cell size differs from the default training distribution (e.g. large plant cells, small bacteria). When False (default, per the Cellpose-SAM paper), no diameter-based rescaling is applied. Does not affect inference; use the 'diameter' parameter in infer() to control rescaling at inference time.","type":"boolean"},"label":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"description":"Annotation label to use for training (e.g. 'cells'). Saved in session metadata and used to filter sessions. Should match the mask folder name: masks_{label}/ in the artifact.","examples":["cells","nucleus"]}},"required":["artifact"],"type":"object"}}},"stop_training":{"type":"function","function":{"name":"stop_training","description":"Stop an ongoing training session.","parameters":{"properties":{"session_id":{"description":"Identifier returned by ``start_training``.","type":"string"}},"required":["session_id"],"type":"object"}}},"get_load":{"type":"function","function":{"name":"get_load","description":"Returns the current load of the BioEngine application service as a float value between 0 and 1.\nThis method is used by Hypha's load balancing system to distribute requests\nacross multiple service instances and for monitoring service capacity.\n\nLoad Calculation:\n- 0.0: No active requests, service is idle and ready to handle new requests\n- 1.0: Maximum capacity reached, all semaphore slots occupied\n- Values between 0 and 1 indicate partial load based on active request ratio\n\nThe load is calculated based on the number of active requests being processed\nthrough the service semaphore, which limits concurrent request processing to prevent overload.\n\nReturns:\n    float: Current service load between 0.0 (idle) and 1.0 (at capacity)","parameters":{"properties":{},"required":["context"],"type":"object"}}},"get_num_pcs":{"type":"function","function":{"name":"get_num_pcs","description":"Returns the current number of active WebRTC peer connections for this BioEngine application.\nThis method is used for monitoring connection status, debugging WebRTC connectivity issues,\nand understanding real-time usage patterns of the application.\n\nWebRTC peer connections enable direct peer-to-peer communication between clients and the\nBioEngine application, bypassing traditional server-mediated communication for better\nperformance with large data transfers.\n\nConnection States Tracked:\n- Only counts connections in \"connected\" state\n- Excludes failed, closed, or disconnected connections\n- Updates automatically as connections are established or terminated\n\nReturns:\n    int: Number of currently active WebRTC peer connections (0 or positive integer)","parameters":{"properties":{},"required":["context"],"type":"object"}}},"get_rtc_service_id":{"type":"function","function":{"name":"get_rtc_service_id","description":"Returns the registered WebRTC service identifier if the WebRTC service is successfully registered.\nThis method is used to verify WebRTC service availability and obtain the service ID for\ndirect peer-to-peer connections to the BioEngine application.\n\nWebRTC Service Registration:\n- WebRTC services are registered separately from the main WebSocket service\n- Service ID follows the pattern: \"{application_id}-rtc\"\n- Registration may fail due to network issues or server constraints\n- Main application remains functional even if WebRTC registration fails\n\nUse Cases:\n- Verify WebRTC capability before attempting peer-to-peer connections\n- Troubleshoot WebRTC service registration issues\n- Provide service discovery information to clients\n\nReturns:\n    Optional[str]: WebRTC service ID string if registered, None if registration failed or pending","parameters":{"properties":{},"required":["context"],"type":"object"}}}},"_rtype":"pydantic-model"}