diff --git a/deps.edn b/deps.edn index 32b0f18..896586e 100644 --- a/deps.edn +++ b/deps.edn @@ -1,6 +1,6 @@ {:paths ["src" "test"] :deps {org.clojure/clojure {:mvn/version "1.12.0"} - io.github.radarroark/xitdb {:mvn/version "0.16.0"}} + io.github.radarroark/xitdb {:mvn/version "0.20.0"}} :aliases {:test diff --git a/src/xitdb/array_list.clj b/src/xitdb/array_list.clj index eb4e9da..6d65d48 100644 --- a/src/xitdb/array_list.clj +++ b/src/xitdb/array_list.clj @@ -132,7 +132,6 @@ (.count wal)) (cons [this o] - ;;TODO: Figure out if it is correct to append to the end (operations/array-list-append-value! wal (common/unwrap o)) this) diff --git a/src/xitdb/db.clj b/src/xitdb/db.clj index 0b624bd..a84f141 100644 --- a/src/xitdb/db.clj +++ b/src/xitdb/db.clj @@ -1,5 +1,6 @@ (ns xitdb.db (:require + [xitdb.common :as common] [xitdb.util.conversion :as conversion] [xitdb.xitdb-types :as xtypes]) (:import @@ -47,6 +48,15 @@ hasher (Hasher. (MessageDigest/getInstance "SHA-1"))] (Database. core hasher))) +(defn v->slot! + "Converts a value to a slot which can be written to a cursor. + For XITDB* types (which support ISlot), will return `-slot`, + for all other types `conversion/v->slot!`" + [^WriteCursor cursor v] + (if (satisfies? common/ISlot v) + (common/-slot v) + (conversion/v->slot! cursor v))) + (defn xitdb-swap! "Returns history index." [db f & args] @@ -58,7 +68,7 @@ (fn [^WriteCursor cursor] (let [obj (xtypes/read-from-cursor cursor true)] (let [retval (apply f (into [obj] args))] - (.write cursor (conversion/v->slot! cursor retval)))))))) + (.write cursor (v->slot! cursor retval)))))))) (defn xitdb-swap-with-lock! "Performs the 'swap!' operation while locking `db.lock`. diff --git a/src/xitdb/hash_map.clj b/src/xitdb/hash_map.clj index ac41c56..cf30a84 100644 --- a/src/xitdb/hash_map.clj +++ b/src/xitdb/hash_map.clj @@ -1,10 +1,11 @@ (ns xitdb.hash-map (:require [xitdb.common :as common] - [xitdb.util.conversion :as conversion] [xitdb.util.operations :as operations]) (:import - [io.github.radarroark.xitdb ReadCursor ReadHashMap WriteCursor WriteHashMap])) + [io.github.radarroark.xitdb + ReadCountedHashMap ReadCursor ReadHashMap + WriteCountedHashMap WriteCursor WriteHashMap])) (defn map-seq [rhm] @@ -18,14 +19,14 @@ (.valAt this key nil)) (valAt [this key not-found] - (let [cursor (.getCursor rhm (conversion/db-key key))] + (let [cursor (operations/map-read-cursor rhm key)] (if (nil? cursor) not-found (common/-read-from-cursor cursor)))) clojure.lang.Associative (containsKey [this key] - (not (nil? (.getCursor rhm (conversion/db-key key))))) + (operations/map-contains-key? rhm key)) (entryAt [this key] (let [v (.valAt this key nil)] @@ -185,4 +186,11 @@ (defn xhash-map [^ReadCursor read-cursor] (->XITDBHashMap (ReadHashMap. read-cursor))) +(defn xwrite-hash-map-counted [^WriteCursor write-cursor] + (->XITDBWriteHashMap (WriteCountedHashMap. write-cursor))) + +(defn xhash-map-counted [^ReadCursor read-cursor] + (->XITDBHashMap (ReadCountedHashMap. read-cursor))) + + diff --git a/src/xitdb/hash_set.clj b/src/xitdb/hash_set.clj index e9cc51c..6d2da8b 100644 --- a/src/xitdb/hash_set.clj +++ b/src/xitdb/hash_set.clj @@ -4,20 +4,22 @@ [xitdb.util.conversion :as conversion] [xitdb.util.operations :as operations]) (:import - [io.github.radarroark.xitdb ReadHashMap WriteCursor WriteHashMap])) + [io.github.radarroark.xitdb + ReadCountedHashSet ReadCursor ReadHashSet + WriteCountedHashSet WriteCursor WriteHashSet])) (defn set-seq [rhm] "The cursors used must implement the IReadFromCursor protocol." - (map val (operations/map-seq rhm #(common/-read-from-cursor %)))) + (operations/set-seq rhm common/-read-from-cursor)) -(deftype XITDBHashSet [^ReadHashMap rhm] +(deftype XITDBHashSet [^ReadHashSet rhs] clojure.lang.IPersistentSet (disjoin [_ k] (throw (UnsupportedOperationException. "XITDBHashSet is read-only"))) (contains [this k] - (not (nil? (.getCursor rhm (conversion/db-key (if (nil? k) 0 (.hashCode k))))))) + (operations/set-contains? rhs k)) (get [this k] (when (.contains this k) @@ -36,11 +38,11 @@ (every? #(.contains this %) other))) (count [_] - (operations/map-item-count rhm)) + (operations/set-item-count rhs)) clojure.lang.Seqable (seq [_] - (set-seq rhm)) + (set-seq rhs)) clojure.lang.ILookup (valAt [this k] @@ -68,7 +70,7 @@ common/IUnwrap (-unwrap [_] - rhm) + rhs) Object (toString [this] @@ -84,26 +86,26 @@ (into #{} (map common/materialize (seq this))))) ;; Writable version of the set -(deftype XITDBWriteHashSet [^WriteHashMap whm] +(deftype XITDBWriteHashSet [^WriteHashSet whs] clojure.lang.IPersistentSet - (disjoin [this k] - (operations/map-dissoc-key! whm (.hashCode k)) + (disjoin [this v] + (operations/set-disj-value! whs (common/unwrap v)) this) - (contains [this k] - (operations/map-contains-key? whm (.hashCode k))) + (contains [this v] + (operations/set-contains? whs (common/unwrap v))) (get [this k] - (when (.contains this k) + (when (.contains this (common/unwrap k)) k)) clojure.lang.IPersistentCollection (cons [this o] - (operations/set-assoc-value! whm (common/unwrap o)) + (operations/set-assoc-value! whs (common/unwrap o)) this) (empty [this] - (operations/set-empty! whm) + (operations/set-empty! whs) this) (equiv [this other] @@ -112,11 +114,11 @@ (every? #(.contains this %) other))) (count [_] - (operations/map-item-count whm)) + (operations/set-item-count whs)) clojure.lang.Seqable (seq [_] - (set-seq whm)) + (set-seq whs)) clojure.lang.ILookup (valAt [this k] @@ -129,11 +131,11 @@ common/ISlot (-slot [_] - (-> whm .cursor .slot)) + (-> whs .cursor .slot)) common/IUnwrap (-unwrap [_] - whm) + whs) Object (toString [_] @@ -141,8 +143,13 @@ ;; Constructor functions (defn xwrite-hash-set [^WriteCursor write-cursor] - (let [whm (operations/init-hash-set! write-cursor)] - (->XITDBWriteHashSet whm))) + (->XITDBWriteHashSet (WriteHashSet. write-cursor))) + +(defn xhash-set [^ReadCursor read-cursor] + (->XITDBHashSet (ReadHashSet. read-cursor))) + +(defn xwrite-hash-set-counted [^WriteCursor write-cursor] + (->XITDBWriteHashSet (WriteCountedHashSet. write-cursor))) -(defn xhash-set [^ReadHashMap read-cursor] - (->XITDBHashSet (ReadHashMap. read-cursor))) +(defn xhash-set-counted [^ReadCursor cursor] + (->XITDBHashSet (ReadCountedHashSet. cursor))) \ No newline at end of file diff --git a/src/xitdb/util/conversion.clj b/src/xitdb/util/conversion.clj index 371163d..96973e7 100644 --- a/src/xitdb/util/conversion.clj +++ b/src/xitdb/util/conversion.clj @@ -2,7 +2,12 @@ (:require [xitdb.util.validation :as validation]) (:import - [io.github.radarroark.xitdb Database$Float Database$Bytes Database$Int Database$Uint ReadArrayList ReadCursor ReadHashMap ReadLinkedArrayList Slot WriteArrayList WriteCursor WriteHashMap Tag WriteLinkedArrayList])) + [io.github.radarroark.xitdb + Database Database$Bytes Database$Float Database$Int + ReadArrayList ReadCountedHashSet ReadCursor ReadHashMap ReadCountedHashMap + ReadHashSet Slot Tag WriteArrayList WriteCountedHashSet WriteCursor WriteCountedHashMap + WriteHashMap WriteHashSet WriteLinkedArrayList] + [java.nio ByteBuffer])) (defn xit-tag->keyword "Converts a XitDB Tag enum to a corresponding Clojure keyword." @@ -26,7 +31,7 @@ {:keyword "kw" :boolean "bl" :key-integer "ki" - :nil "nl" + :nil "nl" ;; TODO: Could use Tag/NONE instead :inst "in" :date "da"}) @@ -46,6 +51,18 @@ (name key)) key)) +(defn db-key-hash + "Returns a byte array representing the stable hash digest of (Clojure) value `v`. + Uses the MessageDigest from the database." + ^bytes [^Database jdb v] + (if (nil? v) + (byte-array (-> jdb .-header .hashSize)) + (let [hash-code (hash v) + buffer (ByteBuffer/allocate Integer/BYTES) + _ (.putInt buffer hash-code) + bytes (.array buffer)] + (.digest (.md jdb) bytes)))) + (defn ^Slot primitive-for "Converts a Clojure primitive value to its corresponding XitDB representation. Handles strings, keywords, integers, booleans, and floats. @@ -105,13 +122,30 @@ (instance? WriteHashMap v) (-> ^WriteHashMap v .cursor .slot) - ;;TODO: Confirm that it is correct to return the Read slots (instance? ReadHashMap v) (-> ^ReadHashMap v .cursor .slot) + (instance? ReadCountedHashMap v) + (-> ^ReadCountedHashMap v .cursor .slot) + + (instance? WriteCountedHashMap v) + (-> ^WriteCountedHashMap v .cursor .slot) + (instance? ReadArrayList v) (-> ^ReadArrayList v .cursor .slot) + (instance? ReadHashSet v) + (-> ^ReadHashSet v .cursor .slot) + + (instance? ReadCountedHashSet v) + (-> ^ReadCountedHashSet v .cursor .slot) + + (instance? WriteHashSet v) + (-> ^WriteHashSet v .cursor .slot) + + (instance? WriteCountedHashSet v) + (-> ^WriteCountedHashSet v .cursor .slot) + (map? v) (do (.write cursor nil) @@ -141,14 +175,14 @@ [k] (cond (integer? k) - (database-bytes (str k) "ki") ;integer keys are stored as strings with 'ki' format tag + (database-bytes (str k) "ki") ;integer keys are stored as strings with 'ki' format tag :else (primitive-for k))) (defn read-bytes-with-format-tag [^ReadCursor cursor] (let [bytes-obj (.readBytesObject cursor nil) - str (String. (.value bytes-obj)) - fmt-tag (some-> bytes-obj .formatTag String.)] + str (String. (.value bytes-obj)) + fmt-tag (some-> bytes-obj .formatTag String.)] (cond (= fmt-tag (fmt-tag-value :keyword)) @@ -228,34 +262,27 @@ (.append write-list (primitive-for v)))) (.-cursor write-list))) -;; Forward declarations for mutual dependencies -(declare map-assoc-value!) -(declare init-hash-set!) -(declare set-assoc-value!) - (defn ^WriteCursor map->WriteHashMapCursor! "Writes a Clojure map to a XitDB WriteHashMap. Returns the cursor of the created WriteHashMap." [^WriteCursor cursor m] - (let [whm (WriteHashMap. cursor)] + (let [whm (WriteCountedHashMap. cursor)] (doseq [[k v] m] - (let [cursor (.putCursor whm (db-key k))] + (let [hash-value (db-key-hash (-> cursor .db) k) + key-cursor (.putKeyCursor whm hash-value) + cursor (.putCursor whm hash-value)] + (.writeIfEmpty key-cursor (v->slot! key-cursor k)) (.write cursor (v->slot! cursor v)))) (.-cursor whm))) (defn ^WriteCursor set->WriteCursor! - "Creates a hash-map and associates the internal key :is-set? to 1. - Map is keyed by the .hashCode of the value, valued by the value :)" + "Writes a Clojure set `s` to a XitDB WriteHashSet. + Returns the cursor of the created WriteHashSet." [^WriteCursor cursor s] - (let [whm (WriteHashMap. cursor) - ;; Mark as set - is-set-key (db-key :%xitdb_set)] - (-> whm - (.putCursor is-set-key) - (.write (primitive-for 1))) - ;; Add values + (let [whm (WriteCountedHashSet. cursor) + db (-> cursor .db)] (doseq [v s] - (let [hash-code (if v (.hashCode v) 0) - cursor (.putCursor whm (db-key hash-code))] - (.write cursor (v->slot! cursor v)))) + (let [hash-code (db-key-hash db v) + cursor (.putCursor whm hash-code)] + (.writeIfEmpty cursor (v->slot! cursor v)))) (.-cursor whm))) \ No newline at end of file diff --git a/src/xitdb/util/operations.clj b/src/xitdb/util/operations.clj index 62206bd..8b83086 100644 --- a/src/xitdb/util/operations.clj +++ b/src/xitdb/util/operations.clj @@ -3,24 +3,7 @@ [xitdb.util.conversion :as conversion] [xitdb.util.validation :as validation]) (:import - [io.github.radarroark.xitdb ReadArrayList ReadHashMap ReadLinkedArrayList Tag WriteArrayList WriteCursor WriteHashMap WriteLinkedArrayList])) - -(def internal-keys - "Map of logical internal key names to their actual storage keys in XitDB. - These keys are used internally by the system and should not be exposed to users." - {:count :%xitdb__count - :is-set? :%xitdb_set}) - -(def hidden-keys - "Set of keys that are used internally and should be hidden from user operations. - Operations like seq, reduce, and count will skip these keys." - (set (vals internal-keys))) - -(def ^:dynamic *enable-map-fast-count?* - "When true, maps store their item count in an internal key for O(1) count operations. - When false, count operations require iteration over all entries (O(n)). - Default is false to minimize storage overhead." - false) + [io.github.radarroark.xitdb ReadArrayList ReadCountedHashMap ReadCountedHashSet ReadHashMap ReadHashSet ReadLinkedArrayList Tag WriteArrayList WriteCursor WriteHashMap WriteHashSet WriteLinkedArrayList])) ;; ============================================================================ ;; Array List Operations @@ -99,24 +82,6 @@ ;; Map Operations ;; ============================================================================ -(defn- update-map-item-count! - "Update the internal key `:count` by applying `f` to the current value. - If the key `:count` does not exist, it is created." - [^WriteHashMap whm f] - (when *enable-map-fast-count?* - (let [count-cursor (.putCursor whm (conversion/db-key (internal-keys :count))) - value (try - (.readInt count-cursor) - (catch Exception _ 0)) - new-value (conversion/primitive-for (f (or value 0)))] - (.write count-cursor new-value)))) - -(defn- map-item-count-stored - "Returns the value of the internal key `:count`." - [^ReadHashMap rhm] - (let [count-cursor (.getCursor rhm (conversion/db-key (internal-keys :count)))] - (.readInt count-cursor))) - (defn map-assoc-value! "Associates a key-value pair in a WriteHashMap. @@ -130,26 +95,20 @@ Throws IllegalArgumentException if attempting to associate an internal key. Updates the internal count if fast counting is enabled." [^WriteHashMap whm k v] - (when (contains? hidden-keys k) - (throw (IllegalArgumentException. (str "Cannot assoc key. " k ". It is reserved for internal use.")))) - - (let [cursor (.putCursor whm (conversion/db-key k)) - new? (= (-> cursor .slot .tag) Tag/NONE)] - (.write cursor (conversion/v->slot! cursor v)) - (when new? - (update-map-item-count! whm inc)) - whm)) + (let [key-hash (conversion/db-key-hash (-> whm .cursor .db) k) + key-cursor (.putKeyCursor whm key-hash) + cursor (.putCursor whm key-hash)] + (.writeIfEmpty key-cursor (conversion/v->slot! key-cursor k)) + (.write cursor (conversion/v->slot! cursor v)))) (defn map-dissoc-key! "Removes a key-value pair from a WriteHashMap. Throws IllegalArgumentException if attempting to remove an internal key. Updates the internal count if fast counting is enabled." [^WriteHashMap whm k] - (when (contains? hidden-keys k) - (throw (IllegalArgumentException. (str "Cannot dissoc key. " k ". It is reserved for internal use.")))) - - (when (.remove whm (conversion/db-key k)) - (update-map-item-count! whm dec))) + (let [key-hash (conversion/db-key-hash (-> whm .cursor .db) k)] + (.remove whm key-hash)) + whm) (defn ^WriteHashMap map-empty! "Empties a WriteHashMap by replacing its contents with an empty map. @@ -162,13 +121,14 @@ (defn map-contains-key? "Checks if a WriteHashMap contains the specified key. Returns true if the key exists, false otherwise." - [^WriteHashMap whm key] - (not (nil? (.getCursor whm (conversion/db-key key))))) + [^ReadHashMap whm key] + (let [key-hash (conversion/db-key-hash (-> whm .cursor .db) key)] + (not (nil? (.getKeyCursor whm key-hash))))) (defn map-item-count-iterated "Returns the number of keys in the map by iterating. The count includes internal keys if any." - [^ReadHashMap rhm] + [^Iterable rhm] (let [it (.iterator rhm)] (loop [cnt 0] (if (.hasNext it) @@ -180,76 +140,71 @@ (defn map-item-count "Returns the number of key/vals in the map." [^ReadHashMap rhm] - (if *enable-map-fast-count?* - (map-item-count-stored rhm) + (if (instance? ReadCountedHashMap rhm) + (.count ^ReadCountedHashMap rhm) (map-item-count-iterated rhm))) (defn map-read-cursor "Gets a read cursor for the specified key in a ReadHashMap. Returns the cursor if the key exists, nil otherwise." [^ReadHashMap rhm key] - (.getCursor rhm (conversion/db-key key))) + (let [key-hash (conversion/db-key-hash (-> rhm .cursor .db) key)] + (.getCursor rhm key-hash))) + (defn map-write-cursor "Gets a write cursor for the specified key in a WriteHashMap. Creates the key if it doesn't exist." [^WriteHashMap whm key] - (.putCursor whm (conversion/db-key key))) + (let [key-hash (conversion/db-key-hash (-> whm .cursor .db) key)] + (.putCursor whm key-hash))) ;; ============================================================================ ;; Set Operations ;; ============================================================================ -(defn set-assoc-value! - "Adds a value to a set (implemented as a WriteHashMap). - Uses the value's hashCode as the key and the value itself as the value. - Only adds the value if it doesn't already exist (based on hashCode). - Returns the modified WriteHashMap." - [^WriteHashMap whm v] - (let [hash-code (if v (.hashCode v) 0)] - (let [cursor (.putCursor whm (conversion/db-key hash-code)) - new? (= (-> cursor .slot .tag) Tag/NONE)] - (when new? - ;; Only write value when the hashCode key doesn't exist - (.write cursor (conversion/v->slot! cursor v)) - (update-map-item-count! whm inc)) - whm))) - -(defn ^WriteHashMap mark-as-set! - "Marks a WriteHashMap as being a set by adding an internal marker. - This allows the system to distinguish between maps and sets. - Returns the modified WriteHashMap." - [^WriteHashMap whm] - (let [is-set-key (conversion/db-key (internal-keys :is-set?))] - (-> whm - (.putCursor is-set-key) - (.write (conversion/primitive-for 1))) - whm)) +(defn set-item-count + "Returns the number of values in the set." + [^ReadHashSet rhs] + (if (instance? ReadCountedHashSet rhs) + (.count ^ReadCountedHashSet rhs) + (map-item-count-iterated rhs))) -(defn ^WriteHashMap init-hash-set! - "Initializes a new WriteHashMap as a set. - Creates a WriteHashMap and marks it as a set using the internal marker. - Returns the newly created WriteHashMap configured as a set." - [^WriteCursor cursor] - (let [whm (WriteHashMap. cursor)] - (mark-as-set! whm) - whm)) +(defn set-assoc-value! + "Adds a value to a set." + [^WriteHashSet whs v] + (let [hash-code (conversion/db-key-hash (-> whs .cursor .db) v) + cursor (.putCursor whs hash-code)] + (.writeIfEmpty cursor (conversion/v->slot! cursor v)) + whs)) + +(defn set-disj-value! + "Removes a value from a set" + [^WriteHashSet whs v] + (let [hash-code (conversion/db-key-hash (-> whs .cursor .db) v)] + (.remove whs hash-code) + whs)) + +(defn set-contains? + "Returns true if `v` is in the set." + [rhs v] + (let [hash-code (conversion/db-key-hash (-> rhs .-cursor .-db) v) + cursor (.getCursor rhs hash-code)] + (some? cursor))) (defn ^WriteHashMap set-empty! - "Empties a set (WriteHashMap) and re-initializes it as an empty set. - Clears all values and re-adds the internal set marker. - Returns the emptied and re-initialized WriteHashMap." - [^WriteHashMap whm] - (map-empty! whm) - (init-hash-set! (.cursor whm)) - whm) + "Replaces the whs value with an empty set." + [^WriteHashSet whs] + (let [empty-set (conversion/v->slot! (.cursor whs) #{})] + (.write ^WriteCursor (.cursor whs) empty-set)) + whs) ;; ============================================================================ ;; Sequence Operations ;; ============================================================================ (defn map-seq - "Return a lazy seq of key-value MapEntry pairs, skipping hidden keys." + "Return a lazy seq of key-value MapEntry pairs." [^ReadHashMap rhm read-from-cursor] (let [it (.iterator rhm)] (letfn [(step [] @@ -257,11 +212,22 @@ (when (.hasNext it) (let [cursor (.next it) kv (.readKeyValuePair cursor) - k (conversion/read-bytes-with-format-tag (.-keyCursor kv))] - (if (contains? hidden-keys k) - (step) - (let [v (read-from-cursor (.-valueCursor kv))] - (cons (clojure.lang.MapEntry. k v) (step))))))))] + k (read-from-cursor (.-keyCursor kv))] + (let [v (read-from-cursor (.-valueCursor kv))] + (cons (clojure.lang.MapEntry. k v) (step)))))))] + (step)))) + +(defn set-seq + "Return a lazy seq values from the set." + [rhm read-from-cursor] + (let [it (.iterator rhm)] + (letfn [(step [] + (lazy-seq + (when (.hasNext it) + (let [cursor (.next it) + kv (.readKeyValuePair cursor) + v (read-from-cursor (.-keyCursor kv))] + (cons v (step))))))] (step)))) (defn array-seq @@ -269,11 +235,11 @@ Uses the provided read-from-cursor function to convert cursors to values. Returns a lazy sequence of the array elements." [^ReadArrayList ral read-from-cursor] - (let [iter (.iterator ral) + (let [iter (.iterator ral) lazy-iter (fn lazy-iter [] (when (.hasNext iter) (let [cursor (.next iter) - value (read-from-cursor cursor)] + value (read-from-cursor cursor)] (lazy-seq (cons value (lazy-iter))))))] (lazy-iter))) @@ -282,11 +248,11 @@ Uses the provided read-from-cursor function to convert cursors to values. Returns a lazy sequence of the linked array elements." [^ReadLinkedArrayList rlal read-from-cursor] - (let [iter (.iterator rlal) + (let [iter (.iterator rlal) lazy-iter (fn lazy-iter [] (when (.hasNext iter) (let [cursor (.next iter) - value (read-from-cursor cursor)] + value (read-from-cursor cursor)] (lazy-seq (cons value (lazy-iter))))))] (lazy-iter))) @@ -296,27 +262,25 @@ (let [it (.iterator rhm)] (loop [result init] (if (.hasNext it) - (let [cursor (.next it) - kv (.readKeyValuePair cursor) - k (conversion/read-bytes-with-format-tag (.-keyCursor kv))] - (if (contains? hidden-keys k) - (recur result) - (let [v (read-from-cursor (.-valueCursor kv)) - new-result (f result k v)] - (if (reduced? new-result) - @new-result - (recur new-result))))) + (let [cursor (.next it) + kv (.readKeyValuePair cursor) + k (read-from-cursor (.-keyCursor kv)) + v (read-from-cursor (.-valueCursor kv)) + new-result (f result k v)] + (if (reduced? new-result) + @new-result + (recur new-result))) result)))) (defn array-kv-reduce "Efficiently reduces over index-value pairs in a ReadArrayList." [^ReadArrayList ral read-from-cursor f init] (let [count (.count ral)] - (loop [i 0 + (loop [i 0 result init] (if (< i count) - (let [cursor (.getCursor ral i) - v (read-from-cursor cursor) + (let [cursor (.getCursor ral i) + v (read-from-cursor cursor) new-result (f result i v)] (if (reduced? new-result) @new-result diff --git a/src/xitdb/xitdb_types.clj b/src/xitdb/xitdb_types.clj index 529570d..0d6a1f1 100644 --- a/src/xitdb/xitdb_types.clj +++ b/src/xitdb/xitdb_types.clj @@ -7,20 +7,7 @@ [xitdb.hash-set :as xhash-set] [xitdb.util.conversion :as conversion]) (:import - (io.github.radarroark.xitdb ReadCursor ReadHashMap Slot Tag WriteCursor))) - -(defn xhash-map-or-set [^ReadCursor cursor] - (let [hm (ReadHashMap. cursor)] - (if (.getCursor hm (conversion/db-key :%xitdb_set)) - (xhash-set/xhash-set cursor) - (xhash-map/xhash-map cursor)))) - -(defn x-write-map-or-set [^ReadCursor cursor] - (let [hm (ReadHashMap. cursor)] - (if (.getCursor hm (conversion/db-key :%xitdb_set)) - (xhash-set/xwrite-hash-set cursor) - (xhash-map/xwrite-hash-map cursor)))) - + (io.github.radarroark.xitdb ReadCountedHashMap ReadCursor ReadHashMap Slot Tag WriteCursor WriteHashMap))) (defn read-from-cursor [^ReadCursor cursor for-writing?] (let [value-tag (some-> cursor .slot .tag)] @@ -39,10 +26,24 @@ (.readFloat cursor) (= value-tag Tag/HASH_MAP) + (if for-writing? + (xhash-map/xwrite-hash-map cursor) + (xhash-map/xhash-map cursor)) + + (= value-tag Tag/COUNTED_HASH_MAP) + (if for-writing? + (xhash-map/xwrite-hash-map-counted cursor) + (xhash-map/xhash-map-counted cursor)) + + (= value-tag Tag/HASH_SET) + (if for-writing? + (xhash-set/xwrite-hash-set cursor) + (xhash-set/xhash-set cursor)) + (= value-tag Tag/COUNTED_HASH_SET) (if for-writing? - (x-write-map-or-set cursor) - (xhash-map-or-set cursor)) + (xhash-set/xwrite-hash-set-counted cursor) + (xhash-set/xhash-set-counted cursor)) (= value-tag Tag/ARRAY_LIST) (if for-writing? diff --git a/src/xitdb/xitdb_util.clj b/src/xitdb/xitdb_util.clj deleted file mode 100644 index 67d9def..0000000 --- a/src/xitdb/xitdb_util.clj +++ /dev/null @@ -1,503 +0,0 @@ -(ns xitdb.xitdb-util - (:import - [io.github.radarroark.xitdb Database$Float Database$Bytes Database$Int Database$Uint ReadArrayList ReadCursor ReadHashMap ReadLinkedArrayList Slot WriteArrayList WriteCursor WriteHashMap Tag WriteLinkedArrayList])) - -(defn xit-tag->keyword - "Converts a XitDB Tag enum to a corresponding Clojure keyword." - [tag] - (cond - (= tag Tag/NONE) :none - (= tag Tag/INDEX) :index - (= tag Tag/ARRAY_LIST) :array-list - (= tag Tag/LINKED_ARRAY_LIST) :linked-array-list - (= tag Tag/HASH_MAP) :hash-map - (= tag Tag/KV_PAIR) :kv-pair - (= tag Tag/BYTES) :bytes - (= tag Tag/SHORT_BYTES) :short-bytes - (= tag Tag/UINT) :uint - (= tag Tag/INT) :int - (= tag Tag/FLOAT) :float - :else :unknown)) - -;; map of logical tag -> string used as formatTag in the Bytes record. -(def fmt-tag-value - {:keyword "kw" - :boolean "bl" - :key-integer "ki" - :nil "nl" - :inst "in" - :date "da"}) - -(def true-str "#t") -(def false-str "#f") - -;; map of logical key -> key stored in the HashMap -(def internal-keys - {:count :%xitdb__count - :is-set? :%xitdb_set}) - -;; HashMap keys which are used internally and should be hidden from user -(def hidden-keys (set (vals internal-keys))) - -(declare ^WriteCursor map->WriteHashMapCursor!) -(declare ^WriteCursor coll->ArrayListCursor!) -(declare ^WriteCursor list->LinkedArrayListCursor!) -(declare ^WriteCursor set->WriteCursor!) - -(def ^:dynamic *debug?* false) - -(defn lazy-seq? [v] - (instance? clojure.lang.LazySeq v)) - -(defn vector-or-chunked? [v] - (or (vector? v) (chunked-seq? v))) - -(defn list-or-cons? [v] - (or (list? v) (instance? clojure.lang.Cons v))) - -(defn ^String keyname [key] - (if (keyword? key) - (if (namespace key) - (str (namespace key) "/" (name key)) - (name key)) - key)) - - -(defn ^Database$Bytes database-bytes - ([^String s] - (Database$Bytes. s)) - ([^String s ^String tag] - (Database$Bytes. s tag))) - - -(defn ^Slot primitive-for - "Converts a Clojure primitive value to its corresponding XitDB representation. - Handles strings, keywords, integers, booleans, and floats. - Throws an IllegalArgumentException for unsupported types." - [v] - (cond - - (lazy-seq? v) - (throw (IllegalArgumentException. "Lazy sequences can be infinite and not allowed!")) - - (string? v) - (database-bytes v) - - (keyword? v) - (database-bytes (keyname v) (fmt-tag-value :keyword)) - - (integer? v) - (Database$Int. v) - - (boolean? v) - (database-bytes (if v true-str false-str) (fmt-tag-value :boolean)) - - (double? v) - (Database$Float. v) - - (nil? v) - (database-bytes "" (fmt-tag-value :nil)) - - (instance? java.time.Instant v) - (database-bytes (str v) (fmt-tag-value :inst)) - - (instance? java.util.Date v) - (database-bytes (str (.toInstant ^java.util.Date v)) (fmt-tag-value :date)) - - :else - (throw (IllegalArgumentException. (str "Unsupported type: " (type v) v))))) - -(defn ^Slot v->slot! - "Converts a value to a XitDB slot. - Handles WriteArrayList and WriteHashMap instances directly. - Recursively processes Clojure maps and collections. - Falls back to primitive conversion for other types." - [^WriteCursor cursor v] - (cond - - (instance? WriteArrayList v) - (-> ^WriteArrayList v .cursor .slot) - - (instance? WriteLinkedArrayList v) - (-> ^WriteLinkedArrayList v .cursor .slot) - - (instance? WriteHashMap v) - (-> ^WriteHashMap v .cursor .slot) - - ;;TODO: Confirm that it is correct to return the Read slots - (instance? ReadHashMap v) - (-> ^ReadHashMap v .cursor .slot) - - (instance? ReadArrayList v) - (-> ^ReadArrayList v .cursor .slot) - - (map? v) - (do - (.write cursor nil) - (.slot (map->WriteHashMapCursor! cursor v))) - - (list-or-cons? v) - (do - (.write cursor nil) - (.slot (list->LinkedArrayListCursor! cursor v))) - - (vector-or-chunked? v) - (do - (.write cursor nil) - (.slot (coll->ArrayListCursor! cursor v))) - - (set? v) - (do - (.write cursor nil) - (.slot (set->WriteCursor! cursor v))) - - :else - (primitive-for v))) - -(defn ^WriteArrayList array-list-append-value! - "Appends a value to a WriteArrayList. - Converts the value to an appropriate XitDB representation using v->slot!." - [^WriteArrayList wal v] - (let [cursor (.appendCursor wal)] - (.write cursor (v->slot! cursor v)) - wal)) - -(defn ^WriteArrayList array-list-assoc-value! - "Associates a value at index i in a WriteArrayList. - Appends the value if the index equals the current count. - Replaces the value at the specified index otherwise. - Throws an IllegalArgumentException if the index is out of bounds." - [^WriteArrayList wal i v] - - (assert (= Tag/ARRAY_LIST (-> wal .cursor .slot .tag))) - (assert (number? i)) - - (when (> i (.count wal)) - (throw (IllegalArgumentException. "Index out of bounds. "))) - - (let [cursor (if (= i (.count wal)) - (.appendCursor wal) - (.putCursor wal i))] - (.write cursor (v->slot! cursor v))) - wal) - -(defn array-list-pop! [^WriteArrayList wal] - (when (zero? (.count wal)) - (throw (IllegalStateException. "Can't pop empty array"))) - - (.slice wal (dec (.count wal)))) - -(defn array-list-empty! [^WriteArrayList wal] - (let [^WriteCursor cursor (-> wal .cursor)] - (.write cursor (v->slot! cursor [])))) - -(defn linked-array-list-append-value! - "Appends a value to a WriteLinkedArrayList. - Converts the value to an appropriate XitDB representation using v->slot!." - [^WriteLinkedArrayList wlal v] - (let [cursor (.appendCursor wlal)] - (.write cursor (v->slot! cursor v)) - nil)) - -(defn linked-array-list-insert-value! - "Appends a value to a WriteLinkedArrayList. - Converts the value to an appropriate XitDB representation using v->slot!." - [^WriteLinkedArrayList wlal pos v] - (let [cursor (-> wlal .cursor)] - (.insert wlal pos (v->slot! cursor v))) - nil) - -(defn linked-array-list-pop! - [^WriteLinkedArrayList wlal] - (.remove wlal 0) - nil) - -(defn ^Database$Bytes db-key - "Converts k from a Clojure type to a Database$Bytes representation to be used in - cursor functions." - [k] - (cond - (integer? k) - (database-bytes (str k) "ki") ;integer keys are stored as strings with 'ki' format tag - :else - (primitive-for k))) - -;; Enable storing the count of items in the hashmap under an internal key :count -(def ^:dynamic *enable-map-fast-count?* false) - -(defn- update-map-item-count! - "Update the internal key `:count` by applying `f` to the current value. - If the key `:count` does not exist, it is created." - [^WriteHashMap whm f] - (when *enable-map-fast-count?* - (let [count-cursor (.putCursor whm (db-key (internal-keys :count))) - value (try - (.readInt count-cursor) - (catch Exception _ 0)) - new-value (primitive-for (f (or value 0)))] - (.write count-cursor new-value)))) - -(defn- map-item-count-stored - "Returns the value of the internal key `:count`." - [^ReadHashMap rhm] - (let [count-cursor (.getCursor rhm (db-key (internal-keys :count)))] - (.readInt count-cursor))) - -(defn map-assoc-value! - "Associates a key-value pair in a WriteHashMap. - Converts the key to a string and the value to an appropriate XitDB representation. - throws when trying to associate a internal key." - [^WriteHashMap whm k v] - (when (contains? hidden-keys k) - (throw (IllegalArgumentException. (str "Cannot assoc key. " k ". It is reserved for internal use.")))) - - (let [cursor (.putCursor whm (db-key k)) - new? (= (-> cursor .slot .tag) Tag/NONE)] - (.write cursor (v->slot! cursor v)) - (when new? - (update-map-item-count! whm inc)) - whm)) - -(defn map-dissoc-key! - [^WriteHashMap whm k] - (when (contains? hidden-keys k) - (throw (IllegalArgumentException. (str "Cannot dissoc key. " k ". It is reserved for internal use.")))) - - (when (.remove whm (db-key k)) - (update-map-item-count! whm dec))) - -(defn ^WriteHashMap map-empty! [^WriteHashMap whm] - (let [^WriteCursor cursor (-> whm .cursor)] - (.write cursor (v->slot! cursor {})) - whm)) - -(defn map-contains-key? [^WriteHashMap whm key] - (not (nil? (.getCursor whm (db-key key))))) - -(defn map-item-count-iterated - "Returns the number of keys in the map by iterating. - The count includes internal keys if any." - [^ReadHashMap rhm] - (let [it (.iterator rhm)] - (loop [cnt 0] - (if (.hasNext it) - (do - (.next it) - (recur (inc cnt))) - cnt)))) - -(defn map-item-count - "Returns the number of key/vals in the map." - [^ReadHashMap rhm] - (if *enable-map-fast-count?* - (map-item-count-stored rhm) - (map-item-count-iterated rhm))) - -(defn map-read-cursor [^ReadHashMap rhm key] - (.getCursor rhm (db-key key))) - -(defn map-write-cursor [^WriteHashMap whm key] - (.putCursor whm (db-key key))) - -(defn ^WriteCursor coll->ArrayListCursor! - "Converts a Clojure collection to a XitDB ArrayList cursor. - Handles nested maps and collections recursively. - Returns the cursor of the created WriteArrayList." - [^WriteCursor cursor coll] - (when *debug?* (println "Write array" (type coll))) - (let [write-array (WriteArrayList. cursor)] - (doseq [v coll] - (cond - (map? v) - (let [v-cursor (.appendCursor write-array)] - (map->WriteHashMapCursor! v-cursor v)) - - (list-or-cons? v) - (let [v-cursor (.appendCursor write-array)] - (list->LinkedArrayListCursor! v-cursor v)) - - (vector-or-chunked? v) - (let [v-cursor (.appendCursor write-array)] - (coll->ArrayListCursor! v-cursor v)) - - :else - (.append write-array (primitive-for v)))) - (.-cursor write-array))) - -(defn ^WriteCursor list->LinkedArrayListCursor! - "Converts a Clojure list or seq-like collection to a XitDB LinkedArrayList cursor. - Optimized for sequential access collections rather than random access ones." - [^WriteCursor cursor coll] - (when *debug?* (println "Write list" (type coll))) - (let [write-list (WriteLinkedArrayList. cursor)] - (doseq [v coll] - (when *debug?* (println "v=" v)) - (cond - (map? v) - (let [v-cursor (.appendCursor write-list)] - (map->WriteHashMapCursor! v-cursor v)) - - (lazy-seq? v) - (throw (IllegalArgumentException. "Lazy sequences can be infinite and not allowed !")) - - (list-or-cons? v) - (let [v-cursor (.appendCursor write-list)] - (list->LinkedArrayListCursor! v-cursor v)) - - (vector-or-chunked? v) - (let [v-cursor (.appendCursor write-list)] - (coll->ArrayListCursor! v-cursor v)) - - :else - (.append write-list (primitive-for v)))) - (.-cursor write-list))) - -;; ---------- - -(defn set-assoc-value! - [^WriteHashMap whm v] - (let [hash-code (if v (.hashCode v) 0)] - (let [cursor (.putCursor whm (db-key hash-code)) - new? (= (-> cursor .slot .tag) Tag/NONE)] - (when new? - ;; Only write value when the hashCode key doesn't exist - (.write cursor (v->slot! cursor v)) - (update-map-item-count! whm inc)) - whm))) - - - -(defn ^WriteHashMap mark-as-set! [^WriteHashMap whm] - (let [is-set-key (db-key (internal-keys :is-set?))] - (-> whm - (.putCursor is-set-key) - (.write (primitive-for 1))) - whm)) - -(defn ^WriteHashMap init-hash-set! [^WriteCursor cursor] - (let [whm (WriteHashMap. cursor)] - (mark-as-set! whm) - whm)) - -(defn ^WriteHashMap set-empty! [^WriteHashMap whm] - (map-empty! whm) - (init-hash-set! (.cursor whm)) - whm) - -(defn ^WriteCursor set->WriteCursor! - "Creates a hash-map and associates the internal key :is-set? to 1. - Map is keyed by the .hashCode of the value, valued by the value :)" - [^WriteCursor cursor s] - (let [whm (init-hash-set! cursor)] - (doseq [v s] - (set-assoc-value! whm v)) - (.-cursor whm))) - -(defn ^WriteCursor map->WriteHashMapCursor! - "Writes a Clojure map to a XitDB WriteHashMap. - Returns the cursor of the created WriteHashMap." - [^WriteCursor cursor m] - (let [whm (WriteHashMap. cursor)] - (doseq [[k v] m] - (map-assoc-value! whm k v)) - (.-cursor whm))) - -(defn read-bytes-with-format-tag [^ReadCursor cursor] - (let [bytes-obj (.readBytesObject cursor nil) - str (String. (.value bytes-obj)) - fmt-tag (some-> bytes-obj .formatTag String.)] - (cond - - (= fmt-tag (fmt-tag-value :keyword)) - (keyword str) - - (= fmt-tag (fmt-tag-value :boolean)) - (= str true-str) - - (= fmt-tag (fmt-tag-value :key-integer)) - (Integer/parseInt str) - - (= fmt-tag (fmt-tag-value :inst)) - (java.time.Instant/parse str) - - - (= fmt-tag (fmt-tag-value :date)) - (java.util.Date/from - (java.time.Instant/parse str)) - - - (= fmt-tag (fmt-tag-value :nil)) - nil - - :else - str))) - -(defn map-seq - "Return a lazy seq of key-value MapEntry pairs, skipping hidden keys." - [^ReadHashMap rhm read-from-cursor] - (let [it (.iterator rhm)] - (letfn [(step [] - (lazy-seq - (when (.hasNext it) - (let [cursor (.next it) - kv (.readKeyValuePair cursor) - k (read-bytes-with-format-tag (.-keyCursor kv))] - (if (contains? hidden-keys k) - (step) - (let [v (read-from-cursor (.-valueCursor kv))] - (cons (clojure.lang.MapEntry. k v) (step))))))))] - (step)))) - -(defn array-seq [^ReadArrayList ral read-from-cursor] - (let [iter (.iterator ral) - lazy-iter (fn lazy-iter [] - (when (.hasNext iter) - (let [cursor (.next iter) - value (read-from-cursor cursor)] - (lazy-seq (cons value (lazy-iter))))))] - (lazy-iter))) - -;;Same as above, but different type hints -(defn linked-array-seq [^ReadLinkedArrayList rlal read-from-cursor] - (let [iter (.iterator rlal) - lazy-iter (fn lazy-iter [] - (when (.hasNext iter) - (let [cursor (.next iter) - value (read-from-cursor cursor)] - (lazy-seq (cons value (lazy-iter))))))] - (lazy-iter))) - -(defn map-kv-reduce - "Efficiently reduces over key-value pairs in a ReadHashMap, skipping hidden keys." - [^ReadHashMap rhm read-from-cursor f init] - (let [it (.iterator rhm)] - (loop [result init] - (if (.hasNext it) - (let [cursor (.next it) - kv (.readKeyValuePair cursor) - k (read-bytes-with-format-tag (.-keyCursor kv))] - (if (contains? hidden-keys k) - (recur result) - (let [v (read-from-cursor (.-valueCursor kv)) - new-result (f result k v)] - (if (reduced? new-result) - @new-result - (recur new-result))))) - result)))) - -(defn array-kv-reduce - "Efficiently reduces over index-value pairs in a ReadArrayList." - [^ReadArrayList ral read-from-cursor f init] - (let [count (.count ral)] - (loop [i 0 - result init] - (if (< i count) - (let [cursor (.getCursor ral i) - v (read-from-cursor cursor) - new-result (f result i v)] - (if (reduced? new-result) - @new-result - (recur (inc i) new-result))) - result)))) - - diff --git a/test/xitdb/database_test.clj b/test/xitdb/database_test.clj index 7bc787a..544b99c 100644 --- a/test/xitdb/database_test.clj +++ b/test/xitdb/database_test.clj @@ -371,9 +371,6 @@ (is (= 0 (count @db))) (is (empty? @db)) - (is (thrown? IllegalArgumentException (swap! db assoc :%xitdb__count -3))) - (is (thrown? IllegalArgumentException (swap! db dissoc :%xitdb__count))) - (is (tu/db-equal-to-atom? db)))) (deftest NilTest @@ -447,3 +444,4 @@ + diff --git a/test/xitdb/map_test.clj b/test/xitdb/map_test.clj new file mode 100644 index 0000000..976690f --- /dev/null +++ b/test/xitdb/map_test.clj @@ -0,0 +1,22 @@ +(ns xitdb.map-test + (:require + [clojure.test :refer :all] + [xitdb.test-utils :as tu :refer [with-db]])) + +(deftest map-with-complex-keys + (with-db [db (tu/test-db)] + (testing "Composite values as keys" + (reset! db {:foo {{:bar :baz} 42}}) + (is (= {:foo {{:bar :baz} 42}} + (tu/materialize @db))) + + (reset! db {:foo {[1 :bar] 31 + [2 :baz] 42}}) + (is (= {:foo {[1 :bar] 31 + [2 :baz] 42}} + (tu/materialize @db))) + + (swap! db update :foo dissoc [2 :baz]) + + (is (= {:foo {[1 :bar] 31}} + (tu/materialize @db)))))) \ No newline at end of file diff --git a/test/xitdb/set_test.clj b/test/xitdb/set_test.clj index 59c12fd..37327fa 100644 --- a/test/xitdb/set_test.clj +++ b/test/xitdb/set_test.clj @@ -10,7 +10,6 @@ (testing "Set works" (with-db [db (tu/test-db)] (reset! db #{1 2 3 4 5}) - (swap! db conj 6) (swap! db disj 2 3) @@ -88,5 +87,7 @@ (let [sweets (:sweets @db)] (is (true? (contains? sweets nil))))))) - - +(deftest HashCodeTest + (with-db [db (tu/test-db)] + (reset! db #{:one 1 []}) + (is (= #{:one 1 []} @db))))