made the memory allocator get memory from dlmalloc...

...rather than using mmap(). Also made it free memory properly from threads other than the main one.
2008-12-05 03:33:26 +00:00 · 2008-12-05 03:33:26 +00:00 · 39cb360d9a
commit 39cb360d9a
parent 0980cdfb71
3 changed files with 119 additions and 41 deletions
--- a/src/image.cpp
+++ b/src/image.cpp
@ -66,8 +66,8 @@ namespace image {

 std::list<int> dummy_list;

-template<typename T>
-void cache_type<T>::flush()
+template<typename T, typename SizeFunctor>
+void cache_type<T, SizeFunctor>::flush()
 {
 	typename std::vector<cache_item<T> >::iterator beg = content_.begin();
 	typename std::vector<cache_item<T> >::iterator end = content_.end();
@ -778,6 +778,10 @@ surface get_image(const image::locator& i_locator, TYPE type)
 	// Optimizes surface before storing it
 	res = create_optimized_surface(res);
 	 i_locator.add_to_cache(*imap, res);
+
+	std::cerr << "IMAGES: " << (images_.size()/1024) << " HEXED: " << (images_.size()/1024) << " SCALED_TO_HEX: " << (scaled_to_hex_images_.size()/1024) << " SCALED_TO_ZOOM: " << (scaled_to_zoom_.size()/1024) << " UNMASKED: " << (unmasked_images_.size()/1024) << " BRIGHTENED: " << (brightened_images_.size()/1024) << " SEMI: " << (semi_brightened_images_.size()/1024)
+    << " TOTAL: " << (images_.size() + hexed_images_.size() + scaled_to_hex_images_.size() + scaled_to_zoom_.size() + unmasked_images_.size() + brightened_images_.size() + semi_brightened_images_.size())/(1024*1024) << "\n";
+
 	return res;
 }

@ -863,8 +867,8 @@ void precache_file_existence(const std::string& subdir)
 }


-template<typename T>
-cache_item<T>& cache_type<T>::get_element(int index){
+template<typename T, typename SizeFunctor>
+cache_item<T>& cache_type<T, SizeFunctor>::get_element(int index){
 	assert (index != -1);
 	while(static_cast<size_t>(index) >= content_.size()) {
 		content_.push_back(cache_item<T>());
@ -878,20 +882,22 @@ cache_item<T>& cache_type<T>::get_element(int index){
 	}
 	return elt;
 }
-template<typename T>
-void cache_type<T>::on_load(int index){
+template<typename T, typename SizeFunctor>
+void cache_type<T, SizeFunctor>::on_load(int index){
 	if(index == -1) return ;
 	cache_item<T>& elt = content_[index];
 	if(!elt.loaded) return ;
 	lru_list_.push_front(index);
-	cache_size_++;
+	std::cerr << "cache size: " << size_functor_(elt.item) << "\n";
+	std::cerr << "cache size max: " << cache_size_ << "/" << cache_max_size_ << "\n";
+	cache_size_ += size_functor_(elt.item);
 	elt.position = lru_list_.begin();
 	while(cache_size_ > cache_max_size_-100) {
 		cache_item<T>& elt = content_[lru_list_.back()];
+		cache_size_ -= size_functor_(elt.item);
 		elt.loaded=false;
 		elt.item = T();
 		lru_list_.pop_back();
-		cache_size_--;
 	}
 }

--- a/src/image.hpp
+++ b/src/image.hpp
@ -72,13 +72,22 @@ namespace image {
       	std::list<int>::iterator position;
 	};

-	template<typename T>
+	struct SizeSurface {
+		int operator()(const surface& s) const { return 256 + (s ? s->w*s->h*4 : 0); }
+	};
+
+	struct SizeOne {
+		template<typename T>
+		int operator()(const T& t) const { return 1; }
+	};
+
+	template<typename T, typename SizeFunctor>
 	class cache_type 
 	{
 	public:
 		cache_type() :
 			cache_size_(0),
-			cache_max_size_(2000),
+			cache_max_size_(10000000),
 			lru_list_(),
 			content_()
 		{
@ -87,11 +96,13 @@ namespace image {
 		cache_item<T>& get_element(int index);
 		void on_load(int index);
 		void flush();
+		size_t size() const { return cache_size_; }
 	private:
-		int cache_size_;
-		int cache_max_size_;
+		size_t cache_size_;
+		size_t cache_max_size_;
       	std::list<int> lru_list_;
       	std::vector<cache_item<T> > content_;
+		SizeFunctor size_functor_;
 	};

 	//a generic image locator. Abstracts the location of an image.
@ -156,18 +167,18 @@ namespace image {
 		// loads the image it is pointing to from the disk
 		surface load_from_disk() const;

-		bool in_cache(cache_type<surface>& cache) const
+		bool in_cache(cache_type<surface,SizeSurface>& cache) const
 			{ return index_ == -1 ? false : cache.get_element(index_).loaded; }
-		surface locate_in_cache(cache_type<surface>& cache) const
+		surface locate_in_cache(cache_type<surface,SizeSurface>& cache) const
 			{ return index_ == -1 ? surface() : cache.get_element(index_).item; }
-		void add_to_cache(cache_type<surface>& cache, const surface &image) const
+		void add_to_cache(cache_type<surface,SizeSurface>& cache, const surface &image) const
 			{ if(index_ != -1 ) cache.get_element(index_) = cache_item<surface>(image); cache.on_load(index_); }

-		bool in_cache(cache_type<locator>& cache) const
+		bool in_cache(cache_type<locator,SizeOne>& cache) const
 			{ return index_ == -1 ? false : cache.get_element(index_).loaded; cache.on_load(index_); }
-		locator locate_in_cache(cache_type<locator>& cache) const
+		locator locate_in_cache(cache_type<locator,SizeOne>& cache) const
 			{ return index_ == -1 ? locator() : cache.get_element(index_).item; }
-		void add_to_cache(cache_type<locator>& cache, const locator &image) const
+		void add_to_cache(cache_type<locator,SizeOne>& cache, const locator &image) const
 			{ if(index_ != -1) cache.get_element(index_) = cache_item<locator>(image); }
 	protected:
 		static int last_index_;
@ -181,8 +192,8 @@ namespace image {
 	};


-	typedef cache_type<surface> image_cache;
-	typedef cache_type<locator> locator_cache;
+	typedef cache_type<surface,SizeSurface> image_cache;
+	typedef cache_type<locator,SizeOne> locator_cache;
 	typedef std::map<t_translation::t_terrain, surface> mini_terrain_cache_map;
 	extern mini_terrain_cache_map mini_terrain_cache;
 	extern mini_terrain_cache_map mini_fogged_terrain_cache;
--- a/src/poolalloc.c
+++ b/src/poolalloc.c
@ -34,8 +34,6 @@ and then its data section. A block should be a multiple of the page size.
 A given block is dedicated to allocating chunks of a specific size. All blocks
 are the same size (4096 bytes by default, which should be the minimum).
 - superblock: we allocate one huge block from which all blocks are allocated.
-
-When the program 
 */

 #include <assert.h>
@ -63,7 +61,7 @@ void dlfree(void* ptr);
 #define GET_POOL_INDEX(n) ((n)/CHUNK_SIZE_STEP)
 #define ROUNDUP_SIZE(n) (((n)%CHUNK_SIZE_STEP) ? ((n) + CHUNK_SIZE_STEP - ((n)%CHUNK_SIZE_STEP)) : (n))

-#define CUSTOM_MEMORY_SIZE (1024*1024*20)
+#define CUSTOM_MEMORY_SIZE (1024*1024*40)
 uint8_t* begin_superblock_range = NULL;
 uint8_t* begin_superblock = NULL;
 uint8_t* end_superblock = NULL;
@ -74,14 +72,18 @@ pthread_t main_thread;
 void init_custom_malloc()
 {
 	main_thread = pthread_self();
-	void* alloc = mmap(NULL, CUSTOM_MEMORY_SIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
+
+	// allocate the memory -- allocate an extra block at the end, so that
+	// if the address we get back isn't block-aligned, we can advance
+	// the pointer until it is.
+	void* alloc = dlmalloc(CUSTOM_MEMORY_SIZE + BLOCK_SIZE);
 	assert(alloc);
-	begin_superblock_range = begin_superblock = (uint8_t*)alloc;
-	end_superblock = begin_superblock + CUSTOM_MEMORY_SIZE;
-	while(((intptr_t)begin_superblock)%BLOCK_SIZE) {
+	begin_superblock = (uint8_t*)alloc;
+	while(((uintptr_t)begin_superblock)%BLOCK_SIZE) {
 		++begin_superblock;
 	}

+	end_superblock = begin_superblock + CUSTOM_MEMORY_SIZE;
 	begin_superblock_range = begin_superblock;
 }

@ -219,6 +221,57 @@ void make_block_orphan(Block* block)
 	header->next = NULL;
 }

+// A list of the chunks that were allocated in the main thread, but free()
+// was called in another thread. We can't deallocate them from another thread,
+// so we put them in this array. The main thread will free all these chunks,
+// whenever it can't immediately allocate memory.
+void** free_chunks;
+size_t nfree_chunks, capacity_free_chunks;
+pthread_mutex_t free_chunks_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+//mutex to protect all calls to dlmalloc.
+pthread_mutex_t dlmalloc_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+void free_memory(void* ptr);
+
+void collect_memory_from_other_threads()
+{
+	pthread_mutex_lock(&free_chunks_mutex);
+	int n;
+	for(n = 0; n != free_chunks; ++n) {
+		free_memory(free_chunks[n]);
+	}
+
+	nfree_chunks = 0;
+	pthread_mutex_unlock(&free_chunks_mutex);
+}
+
+void free_memory_from_other_thread(void* ptr)
+{
+	pthread_mutex_lock(&free_chunks_mutex);
+	
+	if(nfree_chunks == capacity_free_chunks) {
+		capacity_free_chunks *= 2;
+		if(capacity_free_chunks < 16) {
+			capacity_free_chunks = 16;
+		}
+
+		pthread_mutex_lock(&dlmalloc_mutex);
+		void** new_free_chunks = (void**)dlrealloc(free_chunks, sizeof(void*)*capacity_free_chunks);
+		pthread_mutex_unlock(&dlmalloc_mutex);
+		if(!new_free_chunks) {
+			pthread_mutex_unlock(&free_chunks_mutex);
+			fprintf(stderr, "DLREALLOC FAILED!\n");
+			return;
+		}
+
+		free_chunks = new_free_chunks;
+	}
+
+	free_chunks[nfree_chunks++] = ptr;
+	pthread_mutex_unlock(&free_chunks_mutex);
+}
+
 Block* get_block(uint32_t chunk_size)
 {
 	const int index = GET_POOL_INDEX(chunk_size);
@ -227,6 +280,14 @@ Block* get_block(uint32_t chunk_size)
 		return block_pools[index];
 	}

+	// free memory from other threads and then try again. This requires a mutex
+	// lock, but this code should be rarely reached.
+	collect_memory_from_other_threads();
+
+	if(block_pools[index]) {
+		return block_pools[index];
+	}
+
 	Block* block = allocate_new_block(chunk_size);
 	if(block == NULL) {
 		return block;
@ -263,8 +324,6 @@ void free_memory(void* ptr)
 	}
 }

-pthread_mutex_t malloc_mutex = PTHREAD_MUTEX_INITIALIZER;
-
 void* malloc(size_t size)
 {
 	if(pthread_self() == main_thread && size > 0 && size <= MAX_CHUNK_SIZE) {
@ -275,25 +334,25 @@ void* malloc(size_t size)
 		}
 	}

-	pthread_mutex_lock(&malloc_mutex);
+	pthread_mutex_lock(&dlmalloc_mutex);
 	void* result = dlmalloc(size);
-	pthread_mutex_unlock(&malloc_mutex);
+	pthread_mutex_unlock(&dlmalloc_mutex);
 	return result;
 }

 void* calloc(size_t count, size_t size)
 {
-	pthread_mutex_lock(&malloc_mutex);
+	pthread_mutex_lock(&dlmalloc_mutex);
 	void* result = dlcalloc(count, size);
-	pthread_mutex_unlock(&malloc_mutex);
+	pthread_mutex_unlock(&dlmalloc_mutex);
 	return result;
 }

 void* valloc(size_t size)
 {
-	pthread_mutex_lock(&malloc_mutex);
+	pthread_mutex_lock(&dlmalloc_mutex);
 	void* result = dlvalloc(size);
-	pthread_mutex_unlock(&malloc_mutex);
+	pthread_mutex_unlock(&dlmalloc_mutex);
 	return result;
 }

@ -313,9 +372,9 @@ void* realloc(void* ptr, size_t size)
 		return new_memory;
 	}

-	pthread_mutex_lock(&malloc_mutex);
+	pthread_mutex_lock(&dlmalloc_mutex);
 	void* result = dlrealloc(ptr, size);
-	pthread_mutex_unlock(&malloc_mutex);
+	pthread_mutex_unlock(&dlmalloc_mutex);
 	return result;
 }

@ -323,16 +382,18 @@ void free(void* ptr)
 {
 	if(IS_OUR_PTR(ptr)) {
 		if(pthread_self() != main_thread) {
-			fprintf(stderr, "ERROR, wrong thread!\n");
+			//this will queue up the free to be performed later in the
+			//main thread when it wants more memory.
+			free_memory_from_other_thread(ptr);
 			return;
-
 		}
+
 		free_memory(ptr);
 		return;
 	}
-	pthread_mutex_lock(&malloc_mutex);
+	pthread_mutex_lock(&dlmalloc_mutex);
 	dlfree(ptr);
-	pthread_mutex_unlock(&malloc_mutex);
+	pthread_mutex_unlock(&dlmalloc_mutex);
 }

 #ifdef TEST_POOLED_ALLOC