freelist.go 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. package bbolt
  2. import (
  3. "fmt"
  4. "sort"
  5. "unsafe"
  6. )
  7. // txPending holds a list of pgids and corresponding allocation txns
  8. // that are pending to be freed.
  9. type txPending struct {
  10. ids []pgid
  11. alloctx []txid // txids allocating the ids
  12. lastReleaseBegin txid // beginning txid of last matching releaseRange
  13. }
  14. // freelist represents a list of all pages that are available for allocation.
  15. // It also tracks pages that have been freed but are still in use by open transactions.
  16. type freelist struct {
  17. ids []pgid // all free and available free page ids.
  18. allocs map[pgid]txid // mapping of txid that allocated a pgid.
  19. pending map[txid]*txPending // mapping of soon-to-be free page ids by tx.
  20. cache map[pgid]bool // fast lookup of all free and pending page ids.
  21. }
  22. // newFreelist returns an empty, initialized freelist.
  23. func newFreelist() *freelist {
  24. return &freelist{
  25. allocs: make(map[pgid]txid),
  26. pending: make(map[txid]*txPending),
  27. cache: make(map[pgid]bool),
  28. }
  29. }
  30. // size returns the size of the page after serialization.
  31. func (f *freelist) size() int {
  32. n := f.count()
  33. if n >= 0xFFFF {
  34. // The first element will be used to store the count. See freelist.write.
  35. n++
  36. }
  37. return pageHeaderSize + (int(unsafe.Sizeof(pgid(0))) * n)
  38. }
  39. // count returns count of pages on the freelist
  40. func (f *freelist) count() int {
  41. return f.free_count() + f.pending_count()
  42. }
  43. // free_count returns count of free pages
  44. func (f *freelist) free_count() int {
  45. return len(f.ids)
  46. }
  47. // pending_count returns count of pending pages
  48. func (f *freelist) pending_count() int {
  49. var count int
  50. for _, txp := range f.pending {
  51. count += len(txp.ids)
  52. }
  53. return count
  54. }
  55. // copyall copies into dst a list of all free ids and all pending ids in one sorted list.
  56. // f.count returns the minimum length required for dst.
  57. func (f *freelist) copyall(dst []pgid) {
  58. m := make(pgids, 0, f.pending_count())
  59. for _, txp := range f.pending {
  60. m = append(m, txp.ids...)
  61. }
  62. sort.Sort(m)
  63. mergepgids(dst, f.ids, m)
  64. }
  65. // allocate returns the starting page id of a contiguous list of pages of a given size.
  66. // If a contiguous block cannot be found then 0 is returned.
  67. func (f *freelist) allocate(txid txid, n int) pgid {
  68. if len(f.ids) == 0 {
  69. return 0
  70. }
  71. var initial, previd pgid
  72. for i, id := range f.ids {
  73. if id <= 1 {
  74. panic(fmt.Sprintf("invalid page allocation: %d", id))
  75. }
  76. // Reset initial page if this is not contiguous.
  77. if previd == 0 || id-previd != 1 {
  78. initial = id
  79. }
  80. // If we found a contiguous block then remove it and return it.
  81. if (id-initial)+1 == pgid(n) {
  82. // If we're allocating off the beginning then take the fast path
  83. // and just adjust the existing slice. This will use extra memory
  84. // temporarily but the append() in free() will realloc the slice
  85. // as is necessary.
  86. if (i + 1) == n {
  87. f.ids = f.ids[i+1:]
  88. } else {
  89. copy(f.ids[i-n+1:], f.ids[i+1:])
  90. f.ids = f.ids[:len(f.ids)-n]
  91. }
  92. // Remove from the free cache.
  93. for i := pgid(0); i < pgid(n); i++ {
  94. delete(f.cache, initial+i)
  95. }
  96. f.allocs[initial] = txid
  97. return initial
  98. }
  99. previd = id
  100. }
  101. return 0
  102. }
  103. // free releases a page and its overflow for a given transaction id.
  104. // If the page is already free then a panic will occur.
  105. func (f *freelist) free(txid txid, p *page) {
  106. if p.id <= 1 {
  107. panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id))
  108. }
  109. // Free page and all its overflow pages.
  110. txp := f.pending[txid]
  111. if txp == nil {
  112. txp = &txPending{}
  113. f.pending[txid] = txp
  114. }
  115. allocTxid, ok := f.allocs[p.id]
  116. if ok {
  117. delete(f.allocs, p.id)
  118. } else if (p.flags & freelistPageFlag) != 0 {
  119. // Freelist is always allocated by prior tx.
  120. allocTxid = txid - 1
  121. }
  122. for id := p.id; id <= p.id+pgid(p.overflow); id++ {
  123. // Verify that page is not already free.
  124. if f.cache[id] {
  125. panic(fmt.Sprintf("page %d already freed", id))
  126. }
  127. // Add to the freelist and cache.
  128. txp.ids = append(txp.ids, id)
  129. txp.alloctx = append(txp.alloctx, allocTxid)
  130. f.cache[id] = true
  131. }
  132. }
  133. // release moves all page ids for a transaction id (or older) to the freelist.
  134. func (f *freelist) release(txid txid) {
  135. m := make(pgids, 0)
  136. for tid, txp := range f.pending {
  137. if tid <= txid {
  138. // Move transaction's pending pages to the available freelist.
  139. // Don't remove from the cache since the page is still free.
  140. m = append(m, txp.ids...)
  141. delete(f.pending, tid)
  142. }
  143. }
  144. sort.Sort(m)
  145. f.ids = pgids(f.ids).merge(m)
  146. }
  147. // releaseRange moves pending pages allocated within an extent [begin,end] to the free list.
  148. func (f *freelist) releaseRange(begin, end txid) {
  149. if begin > end {
  150. return
  151. }
  152. var m pgids
  153. for tid, txp := range f.pending {
  154. if tid < begin || tid > end {
  155. continue
  156. }
  157. // Don't recompute freed pages if ranges haven't updated.
  158. if txp.lastReleaseBegin == begin {
  159. continue
  160. }
  161. for i := 0; i < len(txp.ids); i++ {
  162. if atx := txp.alloctx[i]; atx < begin || atx > end {
  163. continue
  164. }
  165. m = append(m, txp.ids[i])
  166. txp.ids[i] = txp.ids[len(txp.ids)-1]
  167. txp.ids = txp.ids[:len(txp.ids)-1]
  168. txp.alloctx[i] = txp.alloctx[len(txp.alloctx)-1]
  169. txp.alloctx = txp.alloctx[:len(txp.alloctx)-1]
  170. i--
  171. }
  172. txp.lastReleaseBegin = begin
  173. if len(txp.ids) == 0 {
  174. delete(f.pending, tid)
  175. }
  176. }
  177. sort.Sort(m)
  178. f.ids = pgids(f.ids).merge(m)
  179. }
  180. // rollback removes the pages from a given pending tx.
  181. func (f *freelist) rollback(txid txid) {
  182. // Remove page ids from cache.
  183. txp := f.pending[txid]
  184. if txp == nil {
  185. return
  186. }
  187. var m pgids
  188. for i, pgid := range txp.ids {
  189. delete(f.cache, pgid)
  190. tx := txp.alloctx[i]
  191. if tx == 0 {
  192. continue
  193. }
  194. if tx != txid {
  195. // Pending free aborted; restore page back to alloc list.
  196. f.allocs[pgid] = tx
  197. } else {
  198. // Freed page was allocated by this txn; OK to throw away.
  199. m = append(m, pgid)
  200. }
  201. }
  202. // Remove pages from pending list and mark as free if allocated by txid.
  203. delete(f.pending, txid)
  204. sort.Sort(m)
  205. f.ids = pgids(f.ids).merge(m)
  206. }
  207. // freed returns whether a given page is in the free list.
  208. func (f *freelist) freed(pgid pgid) bool {
  209. return f.cache[pgid]
  210. }
  211. // read initializes the freelist from a freelist page.
  212. func (f *freelist) read(p *page) {
  213. if (p.flags & freelistPageFlag) == 0 {
  214. panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.id, p.typ()))
  215. }
  216. // If the page.count is at the max uint16 value (64k) then it's considered
  217. // an overflow and the size of the freelist is stored as the first element.
  218. idx, count := 0, int(p.count)
  219. if count == 0xFFFF {
  220. idx = 1
  221. count = int(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0])
  222. }
  223. // Copy the list of page ids from the freelist.
  224. if count == 0 {
  225. f.ids = nil
  226. } else {
  227. ids := ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[idx : idx+count]
  228. f.ids = make([]pgid, len(ids))
  229. copy(f.ids, ids)
  230. // Make sure they're sorted.
  231. sort.Sort(pgids(f.ids))
  232. }
  233. // Rebuild the page cache.
  234. f.reindex()
  235. }
  236. // read initializes the freelist from a given list of ids.
  237. func (f *freelist) readIDs(ids []pgid) {
  238. f.ids = ids
  239. f.reindex()
  240. }
  241. // write writes the page ids onto a freelist page. All free and pending ids are
  242. // saved to disk since in the event of a program crash, all pending ids will
  243. // become free.
  244. func (f *freelist) write(p *page) error {
  245. // Combine the old free pgids and pgids waiting on an open transaction.
  246. // Update the header flag.
  247. p.flags |= freelistPageFlag
  248. // The page.count can only hold up to 64k elements so if we overflow that
  249. // number then we handle it by putting the size in the first element.
  250. lenids := f.count()
  251. if lenids == 0 {
  252. p.count = uint16(lenids)
  253. } else if lenids < 0xFFFF {
  254. p.count = uint16(lenids)
  255. f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[:])
  256. } else {
  257. p.count = 0xFFFF
  258. ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0] = pgid(lenids)
  259. f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[1:])
  260. }
  261. return nil
  262. }
  263. // reload reads the freelist from a page and filters out pending items.
  264. func (f *freelist) reload(p *page) {
  265. f.read(p)
  266. // Build a cache of only pending pages.
  267. pcache := make(map[pgid]bool)
  268. for _, txp := range f.pending {
  269. for _, pendingID := range txp.ids {
  270. pcache[pendingID] = true
  271. }
  272. }
  273. // Check each page in the freelist and build a new available freelist
  274. // with any pages not in the pending lists.
  275. var a []pgid
  276. for _, id := range f.ids {
  277. if !pcache[id] {
  278. a = append(a, id)
  279. }
  280. }
  281. f.ids = a
  282. // Once the available list is rebuilt then rebuild the free cache so that
  283. // it includes the available and pending free pages.
  284. f.reindex()
  285. }
  286. // reindex rebuilds the free cache based on available and pending free lists.
  287. func (f *freelist) reindex() {
  288. f.cache = make(map[pgid]bool, len(f.ids))
  289. for _, id := range f.ids {
  290. f.cache[id] = true
  291. }
  292. for _, txp := range f.pending {
  293. for _, pendingID := range txp.ids {
  294. f.cache[pendingID] = true
  295. }
  296. }
  297. }