Exploring Python Built-in Functions in Clojure
Table of Contents
Examples
(quote
(require '[python.builtins :as python])
(require '[python.builtins :as p])
(doc python/list)
(doc python/dict)
(doc python/set)
(doc python/tuple)
(doc python/abs)
(python/abs -2)
(python/all [])
(python/any [])
(python/ascii 'a)
(python/bin 4)
(python/bool true)
(python/bytes 8)
(python/callable 'a)
(python/chr 16)
(python/compile "main.py" "main.py" "single")
(python/complex 2)
(python/copyright)
(python/credits)
(python/delattr (python/dict {"a" "b"}) "a")
(python/delattr {"a" 1} "a")
(python/dict {"a" "b"})
(python/dir)
(python/divmod)
(python/enumerate)
(python/eval)
(python/hex)
(python/id)
(python/input)
(python/int)
(python/isinstance)
(python/len)
(python/license)
(python/list)
(python/locals)
(python/map)
(python/max)
(python/memoryview)
(python/min)
(python/next)
(python/object)
(python/object)
(python/open)
(python/ord)
(python/pow)
(python/print)
(python/property)
(python/quit)
(python/range)
(python/repr)
(python/reversed)
(python/round)
(python/set)
(python/setattr)
(python/slice)
(python/sorted)
(python/super)
(python/tuple)
(python/type)
(python/vars)
(python/zip)
)
Modern ML Library Integration
PyTorch Integration
Working with PyTorch tensors directly from Clojure enables deep learning workflows while leveraging Clojure's functional paradigms.
(require '[libpython-clj2.require :refer [require-python]]) (require '[libpython-clj2.python :as py :refer [py. py.. py.-]]) ;; Import PyTorch (require-python '[torch :as torch]) (require-python '[torch.nn :as nn]) (require-python '[torch.optim :as optim]) ;; Create tensors (def x (torch/tensor [[1.0 2.0] [3.0 4.0]])) (def y (torch/tensor [[5.0 6.0] [7.0 8.0]])) ;; Matrix operations (torch/matmul x y) (torch/add x y) ;; GPU support (if available) (when (torch/cuda.is_available) (def x-gpu (py. x "cuda"))) ;; Simple neural network (def model (py/call-attr nn "Sequential" (nn/Linear 784 256) (nn/ReLU) (nn/Linear 256 10))) ;; Forward pass (def input (torch/randn [1 784])) (def output (py. model "__call__" input))
Hugging Face Transformers
Access state-of-the-art NLP models through the transformers library.
(require-python '[transformers :as hf]) ;; Load a pre-trained model and tokenizer (def tokenizer (hf/AutoTokenizer.from_pretrained "bert-base-uncased")) (def model (hf/AutoModel.from_pretrained "bert-base-uncased")) ;; Tokenize text (def inputs (py. tokenizer "__call__" "Hello, Clojure meets Python!" :return_tensors "pt")) ;; Get embeddings (def outputs (py. model "__call__" :py/kwargs inputs)) (def last-hidden (py.- outputs last_hidden_state)) ;; Text generation with GPT-2 (def generator (hf/pipeline "text-generation" :model "gpt2")) (generator "Clojure is a functional programming language that") ;; Sentiment analysis (def classifier (hf/pipeline "sentiment-analysis")) (classifier "libpython-clj2 makes Python integration seamless!")
LangChain Integration
Build LLM-powered applications using LangChain from Clojure.
(require-python '[langchain.llms :as llms]) (require-python '[langchain.chains :as chains]) (require-python '[langchain.prompts :as prompts]) (require-python '[langchain.text_splitter :as splitter]) (require-python '[langchain.embeddings :as embeddings]) (require-python '[langchain.vectorstores :as vectorstores]) ;; Initialize an LLM (def llm (llms/OpenAI :temperature 0.7)) ;; Create a prompt template (def prompt (prompts/PromptTemplate :input_variables ["topic"] :template "Write a brief explanation of {topic} for a Clojure developer.")) ;; Create a chain (def chain (chains/LLMChain :llm llm :prompt prompt)) ;; Run the chain (py. chain "run" "functional programming patterns") ;; Document loading and RAG setup (def text-splitter (splitter/RecursiveCharacterTextSplitter :chunk_size 1000 :chunk_overlap 200)) (def embeddings-model (embeddings/OpenAIEmbeddings)) ;; Create vector store from documents (defn create-knowledge-base [documents] (let [chunks (py. text-splitter "split_documents" documents)] (vectorstores/FAISS.from_documents chunks embeddings-model)))
Clojure Data Science Stack Integration
dtype-next Integration
dtype-next provides high-performance array processing that interoperates with Python arrays and tensors.
(require '[tech.v3.datatype :as dtype]) (require '[tech.v3.tensor :as dtt]) (require '[libpython-clj2.python :as py]) ;; Convert between Clojure and NumPy arrays (require-python '[numpy :as np]) ;; Create a dtype-next tensor (def clj-tensor (dtt/->tensor [[1.0 2.0 3.0] [4.0 5.0 6.0]])) ;; Convert to NumPy array (zero-copy when possible) (def np-array (py/->py-array clj-tensor)) ;; Perform NumPy operations (def result (np/dot np-array (np/transpose np-array))) ;; Convert back to Clojure (def clj-result (dtype/->array-buffer result)) ;; Direct interop with PyTorch tensors (def torch-tensor (torch/from_numpy np-array)) (def clj-from-torch (dtype/->array-buffer (py. torch-tensor "numpy")))
tech.ml.dataset Integration
Seamlessly work with tabular data across Clojure and Python.
(require '[tech.v3.dataset :as ds]) (require-python '[pandas :as pd]) (require-python '[sklearn.model_selection :as model-sel]) (require-python '[sklearn.ensemble :as ensemble]) ;; Load data with tech.ml.dataset (def dataset (ds/->dataset "data.csv")) ;; Convert to Pandas DataFrame (defn dataset->pandas [ds] (pd/DataFrame (into {} (ds/columns ds)))) (def df (dataset->pandas dataset)) ;; Use scikit-learn for ML (def X (py/get-attr df "drop" "target" :axis 1)) (def y (py/get-attr df "target")) ;; Train-test split (def [X-train X-test y-train y-test] (model-sel/train_test_split X y :test_size 0.2 :random_state 42)) ;; Train a model (def rf (ensemble/RandomForestClassifier :n_estimators 100)) (py. rf "fit" X-train y-train) ;; Predictions (def predictions (py. rf "predict" X-test)) ;; Convert predictions back to Clojure (def clj-predictions (vec (py/->jvm predictions)))
Hybrid Data Pipeline Example
(require '[tech.v3.dataset :as ds]) (require '[tech.v3.dataset.column :as col]) (require-python '[pandas :as pd]) (require-python '[torch.utils.data :as torch-data]) ;; Define a data processing pipeline that uses both ecosystems (defn process-pipeline [input-path] ;; Step 1: Load with tech.ml.dataset (fast Clojure CSV parsing) (let [raw-ds (ds/->dataset input-path) ;; Step 2: Clojure data transformations cleaned-ds (-> raw-ds (ds/filter-column :value #(> % 0)) (ds/add-column :log-value (map #(Math/log %) (raw-ds :value)))) ;; Step 3: Convert to Pandas for Python-specific ops df (dataset->pandas cleaned-ds) ;; Step 4: Use Pandas rolling window df-with-rolling (py/set-attr! df "rolling_mean" (py. (py. df "rolling" 7) "mean"))] ;; Step 5: Return as tech.ml.dataset (pandas->dataset df-with-rolling))) (defn pandas->dataset [df] (ds/->dataset (py. df "to_dict" "list")))
Performance Considerations
Memory Management
(require '[libpython-clj2.python :as py]) ;; Explicit garbage collection coordination (defn with-python-gc [f] (try (f) (finally (py/run-simple-string "import gc; gc.collect()")))) ;; Release large Python objects explicitly (defmacro with-python-resources [bindings & body] `(let ~bindings (try ~@body (finally ~@(for [[name _] (partition 2 bindings)] `(py/del ~name)))))) ;; Example usage (with-python-resources [large-tensor (torch/randn [10000 10000]) result (torch/matmul large-tensor large-tensor)] (py. result "mean"))
Zero-Copy Data Transfer
;; Zero-copy between NumPy and dtype-next when possible (require '[tech.v3.datatype.ffi :as ffi]) ;; NumPy arrays backed by native memory can be shared (def np-array (np/zeros [1000000] :dtype "float64")) ;; Access the underlying buffer without copying (def native-buffer (py/->jvm np-array :as-buffer)) ;; Modify in-place from Clojure (dtype/set-value! native-buffer 0 42.0) ;; Changes visible in Python (np/sum np-array) ;; Reflects the modification
Async and Parallel Processing
(require '[clojure.core.async :as async]) ;; Python GIL considerations - use separate processes for true parallelism (require-python '[multiprocessing :as mp]) (require-python '[concurrent.futures :as futures]) ;; For I/O-bound Python operations, use ThreadPoolExecutor (defn parallel-fetch [urls] (let [executor (futures/ThreadPoolExecutor :max_workers 10)] (try (let [future-to-url (into {} (for [url urls] [(py. executor "submit" fetch-url url) url])) results (for [future (futures/as_completed future-to-url)] (py. future "result"))] (doall results)) (finally (py. executor "shutdown"))))) ;; For CPU-bound work, leverage Clojure's parallelism with smaller Python calls (defn parallel-inference [items model] (->> items (partition-all 32) ;; Batch for efficiency (pmap (fn [batch] (py. model "predict" (py/->py-list batch)))) (mapcat identity)))
Caching and Memoization
;; Cache expensive Python model loading (def model-cache (atom {})) (defn get-or-load-model [model-name] (if-let [model (@model-cache model-name)] model (let [model (hf/AutoModel.from_pretrained model-name)] (swap! model-cache assoc model-name model) model))) ;; Memoize tokenization (def memoized-tokenize (memoize (fn [tokenizer text] (py/->jvm (py. tokenizer "__call__" text :return_tensors "pt")))))
Best Practices
Project Setup
Add to deps.edn:
{:deps {clj-python/libpython-clj {:mvn/version "2.025"}
techascent/tech.ml.dataset {:mvn/version "7.000"}
org.bytedeco/numpy {:mvn/version "1.26.0-1.5.10"}}}
Python Environment Management
;; Specify Python executable at startup (require '[libpython-clj2.python :as py]) (py/initialize! :python-executable "/path/to/venv/bin/python" :library-path "/path/to/libpython3.11.so") ;; Or use environment variable ;; LIBPYTHON_CLJ_PYTHON_EXECUTABLE=/path/to/python ;; Install Python packages programmatically (require-python '[pip :as pip]) (pip/main ["install" "torch" "transformers" "langchain"])
Error Handling
(defmacro with-python-error-handling [& body] `(try ~@body (catch Exception e# (let [py-error# (py/python-exception-message e#)] (throw (ex-info "Python error" {:python-error py-error# :clojure-error (.getMessage e#)})))))) ;; Usage (with-python-error-handling (py. model "forward" invalid-input))