[ruby-dev:49963] [Ruby trunk Bug#13136] large_array.sample(11)が遅い

From: nobu@...
Date: 2017-01-20 00:59:54 UTC
List: ruby-dev #49963
Issue #13136 has been updated by Nobuyoshi Nakada.


=E5=88=A5=E6=A1=88=E3=81=A8=E3=81=97=E3=81=A6hash=E3=81=AE=E4=BB=A3=E3=82=
=8F=E3=82=8A=E3=81=AB=E9=85=8D=E5=88=97=E3=81=A7=E8=A6=9A=E3=81=88=E3=81=A6=
=E3=81=8A=E3=81=8F=E3=81=A8=E3=81=84=E3=81=86=E6=96=B9=E6=B3=95=E3=82=82=E3=
=81=82=E3=82=8A=E3=81=BE=E3=81=99=E3=81=8C=E3=80=81=E5=85=83=E3=81=AE=E9=85=
=8D=E5=88=97=E3=81=AB=E6=AF=94=E4=BE=8B=E3=81=97=E3=81=9F=E4=BD=9C=E6=A5=AD=
=E9=A0=98=E5=9F=9F=E3=82=92=E4=BD=BF=E3=81=86=E3=81=AE=E3=81=A7=E3=81=9D=E3=
=81=AE=E3=82=B3=E3=82=B9=E3=83=88=E3=81=8C=E9=AB=98=E3=81=84=E3=81=8B=E3=82=
=82=E3=81=97=E3=82=8C=E3=81=BE=E3=81=9B=E3=82=93=E3=80=82

```diff
diff --git a/array.c b/array.c
index a19e7bb710..4294de640a 100644
--- a/array.c
+++ b/array.c
@@ -131,6 +131,11 @@ static ID id_cmp, id_div, id_power;
=20
 #define ARY_SET(a, i, v) RARRAY_ASET((assert(!ARY_SHARED_P(a)), (a)), (i),=
 (v))
=20
+#define tmpbuf(n, size) rb_str_tmp_new((n)*(size))
+#define tmpbuf_discard(s) (rb_str_resize((s), 0L), RBASIC_SET_CLASS_RAW(s,=
 rb_cString))
+#define tmpary(n) rb_ary_tmp_new(n)
+#define tmpary_discard(a) (ary_discard(a), RBASIC_SET_CLASS_RAW(a, rb_cArr=
ay))
+
 void
 rb_mem_clear(register VALUE *mem, register long size)
 {
@@ -4797,6 +4802,7 @@ rb_ary_sample(int argc, VALUE *argv, VALUE ary)
     VALUE opts, randgen =3D rb_cRandom;
     long n, len, i, j, k, idx[10];
     long rnds[numberof(idx)];
+    long memo_threshold;
=20
     if (OPTHASH_GIVEN_P(opts)) {
 	VALUE rnd;
@@ -4856,6 +4862,11 @@ rb_ary_sample(int argc, VALUE *argv, VALUE ary)
 	}
 	return rb_ary_new_from_args(3, RARRAY_AREF(ary, i), RARRAY_AREF(ary, j), =
RARRAY_AREF(ary, k));
     }
+    memo_threshold =3D
+	len < 2560 ? len / 128 :
+	len < 5120 ? len / 64 :
+	len < 10240 ? len / 32 :
+	len / 16;
     if (n <=3D numberof(idx)) {
 	long sorted[numberof(idx)];
 	sorted[0] =3D idx[0] =3D rnds[0];
@@ -4875,6 +4886,24 @@ rb_ary_sample(int argc, VALUE *argv, VALUE ary)
 	    }
 	});
     }
+    else if (n <=3D memo_threshold) {
+	VALUE tbl =3D tmpbuf(len, sizeof(long));
+	long *table =3D (long*)RSTRING_PTR(tbl);
+	result =3D rb_ary_new_capa(n);
+	for (i=3D0; i<n; i++) {
+	    table[i] =3D -1L;
+	}
+	for (i=3D0; i<n; i++) {
+	    long v;
+	    long j2 =3D j =3D RAND_UPTO(len-i) + i;
+	    long i2 =3D i;
+	    if ((v =3D table[i]) >=3D 0) i2 =3D v;
+	    if ((v =3D table[j]) >=3D 0) j2 =3D v;
+	    table[j] =3D i2;
+	    RARRAY_ASET(result, i, RARRAY_AREF(ary, j2));
+	}
+	tmpbuf_discard(tbl);
+    }
     else {
 	result =3D rb_ary_dup(ary);
 	RBASIC_CLEAR_CLASS(result);
@@ -4955,11 +4984,6 @@ rb_ary_cycle(int argc, VALUE *argv, VALUE ary)
     return Qnil;
 }
=20
-#define tmpbuf(n, size) rb_str_tmp_new((n)*(size))
-#define tmpbuf_discard(s) (rb_str_resize((s), 0L), RBASIC_SET_CLASS_RAW(s,=
 rb_cString))
-#define tmpary(n) rb_ary_tmp_new(n)
-#define tmpary_discard(a) (ary_discard(a), RBASIC_SET_CLASS_RAW(a, rb_cArr=
ay))
-
 /*
  * Build a ruby array of the corresponding values and yield it to the
  * associated block.
```

----------------------------------------
Bug #13136: large_array.sample(11)=E3=81=8C=E9=81=85=E3=81=84
https://bugs.ruby-lang.org/issues/13136#change-62591

* Author: tomoya ishida
* Status: Open
* Priority: Normal
* Assignee:=20
* Target version:=20
* ruby -v: ruby 2.4.0p0 (2016-12-24 revision 57164) [x86_64-darwin16]
* Backport: 2.2: UNKNOWN, 2.3: UNKNOWN, 2.4: UNKNOWN
----------------------------------------
Array#sample=E3=81=AE=E3=83=91=E3=83=95=E3=82=A9=E3=83=BC=E3=83=9E=E3=83=B3=
=E3=82=B9=E3=82=92=E6=94=B9=E5=96=84=E3=81=97=E3=81=9F=E3=81=84

```ruby
require 'benchmark'
arr =3D 100000.times.to_a;
Benchmark.measure{100000.times{arr.sample 10}}.real #=3D> 0.070521000074222=
68 =E9=80=9F=E3=81=84!
Benchmark.measure{100000.times{arr.sample 11}}.real #=3D> 37.8880900000222 =
=E9=81=85=E3=81=84!!!
```

=E7=8F=BE=E7=8A=B6=E3=81=AEArray#sample(n)

```
n<=3D3 =E2=86=92 =E5=80=8B=E5=88=A5=E5=A0=B4=E5=90=88=E5=88=86=E3=81=91
3<n<=3D10 =E2=86=92 2=E9=87=8D=E3=83=AB=E3=83=BC=E3=83=97 O(n^2)
# n < =E9=81=A9=E5=BD=93=E3=81=AA=E9=96=BE=E5=80=A4 =E2=86=92 O(n)=E3=81=AE=
=E3=82=A2=E3=83=AB=E3=82=B4=E3=83=AA=E3=82=BA=E3=83=A0=E3=82=92=E4=BD=BF=E3=
=81=84=E3=81=9F=E3=81=84
n>10 =E2=86=92 dup=E3=81=97=E3=81=A6shuffle O(size) =E5=B7=A8=E5=A4=A7=E9=
=85=8D=E5=88=97=E3=81=8B=E3=82=89=E5=B0=91=E6=95=B0sample=E3=81=99=E3=82=8B=
=E6=99=82=E3=81=AB=E9=81=85=E3=81=84
```

=E3=82=A2=E3=83=AB=E3=82=B4=E3=83=AA=E3=82=BA=E3=83=A0=E6=94=B9=E5=96=84=E6=
=A1=88=E3=81=AB=E3=81=A4=E3=81=84=E3=81=A6

```ruby
# n>10=E3=81=AE=E6=99=82=E3=81=AE=E7=8F=BE=E7=8A=B6=E3=81=AE=E3=82=A2=E3=83=
=AB=E3=82=B4=E3=83=AA=E3=82=BA=E3=83=A0 O(size)
def sample arr, n
  arr =3D arr.dup
  n.times do |i|
    j =3D rand(arr.size - i) + i
    arr[i], arr[j] =3D arr[j], arr[i]
  end
  arr.take n
end

# dup=E3=82=92=E3=82=84=E3=82=81=E3=80=81=E9=85=8D=E5=88=97=E3=81=B8=E3=81=
=AE=E4=BB=A3=E5=85=A5=E3=82=92=E6=B8=9B=E3=82=89=E3=81=99(=E3=81=9F=E3=81=
=A0=E3=81=97=E5=85=83=E9=85=8D=E5=88=97=E3=82=92=E7=A0=B4=E5=A3=8A=E3=81=97=
=E3=81=A6=E3=81=97=E3=81=BE=E3=81=86) O(n)
def sample arr, n
  n.times.map do |i|
    j =3D rand(arr.size - i) + i
    val =3D arr[j]
    arr[j] =3D arr[i]
    val
  end
end

# =E9=85=8D=E5=88=97=E3=82=92=E6=9B=B8=E3=81=8D=E6=8F=9B=E3=81=88=E3=82=8B=
=E4=BB=A3=E3=82=8F=E3=82=8A=E3=81=AB=E3=80=81=E3=81=A9=E3=81=93=E3=82=92=E5=
=85=A5=E3=82=8C=E6=9B=BF=E3=81=88=E3=81=9F=E3=81=8Bhash=E3=81=AB=E4=BF=9D=
=E5=AD=98=E3=81=99=E3=82=8B(=E5=85=83=E9=85=8D=E5=88=97=E3=82=92=E7=A0=B4=
=E5=A3=8A=E3=81=97=E3=81=AA=E3=81=84) O(n)
def sample arr, n
  replace_table =3D {}
  n.times.map do |i|
    j =3D rand(arr.size - i) + i
    j2 =3D replace_table[j] || j
    replace_table[j] =3D replace_table[i] || i
    arr[j2]
  end
end
```

patch=E4=BD=9C=E3=82=8A=E3=81=BE=E3=81=97=E3=81=9F

=E3=83=99=E3=83=B3=E3=83=81=E3=83=9E=E3=83=BC=E3=82=AF

```
arr =3D 100000.times.to_a;
def measure;t=3DTime.now;yield;Time.now-t;end
p measure{10000.times{arr.sample 10}}
p measure{10000.times{arr.sample 11}}
p measure{10000.times{arr.sample 100}}
p measure{10000.times{arr.sample 1000}}
```

ruby 2.4.0p0

```
0.008185
3.938569
4.043285
4.142051
```

this patch

```
0.010408
0.016441
0.08861
0.684844
```


---Files--------------------------------
rb_ary_sample.patch (1.42 KB)
rb_ary_sample_patch2.patch (1.78 KB)


--=20
https://bugs.ruby-lang.org/

In This Thread

Prev Next