Assuming that finding the exact sub-pool usage maximum for any given
distribution is NP complete (not proven).
-Taking into account unbalance ratios (tested programmatically by
+Taking into account sub-class size unbalance (tested programmatically by
randomly taking N entries from 256, calculating the distribution for
each bit (number of nodes for which bit is one/zero), and calculating
the difference in number of nodes for each bit, choosing the minimum
difference -- for millions of runs).
-tot entries unbalance largest linear array (stat. approx.)
+We start from the "ideal" population size (with no unbalance), and do a
+fixed point to find the appropriate population size.
+
+tot entries subclass extra items largest linear array (stat. approx.)
---------------------------------------------------------------------
-48 entries: 2 (98%) 24+1=25 (target ~50/2=25)
-54 entries: 2 (97%) 27+1=28 (target ~56/2=28)
+48 entries: 1 (98%) 24+1=25 (target ~50/2=25)
+54 entries: 1 (97%) 27+1=28 (target ~56/2=28)
Note: there exists rare worse cases where the unbalance is larger, but
it happens _very_ rarely. But need to provide a fallback if the subclass
For pool of size 4, we need to approximate what is the maximum unbalance
we can get for choice of distributions grouped by pairs of bits.
-tot entries unbalance largest linear array (stat. approx.)
+tot entries subclass extra items largest linear array (stat. approx.)
---------------------------------------------------------------------
-92 entries: 8 (99%) 23+2=25 (target: ~100/4=25)
-104 entries: 8 (99%) 26+2=28 (target: ~112/4=28)
+92 entries: 2 (99%) 23+2=25 (target: ~100/4=25)
+104 entries: 2 (99%) 26+2=28 (target: ~112/4=28)
Note: there exists rare worse cases where the unbalance is larger, but
*/
/*
- * This program generates random populations, and shows the worse-case
- * unbalance, as well as the distribution of unbalance encountered.
- * Remember that the unbalance is the delta between the lowest and
- * largest population. Therefore, to get the delta between the subclass
- * size and the actual number of items, we need to divide the unbalance
- * by the number of subclasses (by hand).
+ * This program generates random populations, and shows the largest
+ * sub-class generated, as well as the distribution of sub-class size
+ * for the largest sub-class of each population.
*/
#include <stdio.h>
static uint8_t nr_one[8];
static uint8_t nr_2d_11[8][8];
static uint8_t nr_2d_10[8][8];
-static int global_max_minunbalance = 0;
+static int global_max_minsubclass_len = 0;
-static unsigned int unbalance_distrib[256];
+static unsigned int subclass_len_distrib[256];
static
uint8_t random_char(void)
static
void stat_count(void)
{
- int minunbalance = INT_MAX;
+ int minsubclass_len = INT_MAX;
if (nr_distrib == 2) {
int i;
diff = (int) nr_one[i] * 2 - sel_pool_len;
if (diff < 0)
diff = -diff;
- if (diff < minunbalance) {
- minunbalance = diff;
+ if ((diff >> 1) < minsubclass_len) {
+ minsubclass_len = diff >> 1;
}
}
}
/* Get max linear array size */
if (diff[1] > diff[0])
diff[0] = diff[1];
- if (diff[0] < minunbalance) {
- minunbalance = diff[0];
+ if ((diff[0] >> 2) < minsubclass_len) {
+ minsubclass_len = diff[0] >> 2;
}
}
}
}
- if (minunbalance > global_max_minunbalance) {
- global_max_minunbalance = minunbalance;
+ if (minsubclass_len > global_max_minsubclass_len) {
+ global_max_minsubclass_len = minsubclass_len;
}
- unbalance_distrib[minunbalance]++;
+ subclass_len_distrib[minsubclass_len]++;
}
static
unsigned long long tot = 0;
for (i = 0; i < 256; i++) {
- tot += unbalance_distrib[i];
+ tot += subclass_len_distrib[i];
}
if (tot == 0)
return;
printf("Distribution:\n");
for (i = 0; i < 256; i++) {
printf("(%u, %u, %llu%%) ",
- i, unbalance_distrib[i],
- 100 * (unsigned long long) unbalance_distrib[i] / tot);
+ i, subclass_len_distrib[i],
+ 100 * (unsigned long long) subclass_len_distrib[i] / tot);
}
printf("\n");
}
static
void print_stat(uint64_t i)
{
- printf("after %llu pools, global_max_minunbalance: %d\n",
- (unsigned long long) i, global_max_minunbalance);
+ printf("after %llu pools, global_max_minsubclass_len extra: %d\n",
+ (unsigned long long) i, global_max_minsubclass_len);
print_distrib();
}