[ruby-dev:49963] [Ruby trunk Bug#13136] large_array.sample(11)が遅い
From:
nobu@...
Date:
2017-01-20 00:59:54 UTC
List:
ruby-dev #49963
Issue #13136 has been updated by Nobuyoshi Nakada.
=E5=88=A5=E6=A1=88=E3=81=A8=E3=81=97=E3=81=A6hash=E3=81=AE=E4=BB=A3=E3=82=
=8F=E3=82=8A=E3=81=AB=E9=85=8D=E5=88=97=E3=81=A7=E8=A6=9A=E3=81=88=E3=81=A6=
=E3=81=8A=E3=81=8F=E3=81=A8=E3=81=84=E3=81=86=E6=96=B9=E6=B3=95=E3=82=82=E3=
=81=82=E3=82=8A=E3=81=BE=E3=81=99=E3=81=8C=E3=80=81=E5=85=83=E3=81=AE=E9=85=
=8D=E5=88=97=E3=81=AB=E6=AF=94=E4=BE=8B=E3=81=97=E3=81=9F=E4=BD=9C=E6=A5=AD=
=E9=A0=98=E5=9F=9F=E3=82=92=E4=BD=BF=E3=81=86=E3=81=AE=E3=81=A7=E3=81=9D=E3=
=81=AE=E3=82=B3=E3=82=B9=E3=83=88=E3=81=8C=E9=AB=98=E3=81=84=E3=81=8B=E3=82=
=82=E3=81=97=E3=82=8C=E3=81=BE=E3=81=9B=E3=82=93=E3=80=82
```diff
diff --git a/array.c b/array.c
index a19e7bb710..4294de640a 100644
--- a/array.c
+++ b/array.c
@@ -131,6 +131,11 @@ static ID id_cmp, id_div, id_power;
=20
#define ARY_SET(a, i, v) RARRAY_ASET((assert(!ARY_SHARED_P(a)), (a)), (i),=
(v))
=20
+#define tmpbuf(n, size) rb_str_tmp_new((n)*(size))
+#define tmpbuf_discard(s) (rb_str_resize((s), 0L), RBASIC_SET_CLASS_RAW(s,=
rb_cString))
+#define tmpary(n) rb_ary_tmp_new(n)
+#define tmpary_discard(a) (ary_discard(a), RBASIC_SET_CLASS_RAW(a, rb_cArr=
ay))
+
void
rb_mem_clear(register VALUE *mem, register long size)
{
@@ -4797,6 +4802,7 @@ rb_ary_sample(int argc, VALUE *argv, VALUE ary)
VALUE opts, randgen =3D rb_cRandom;
long n, len, i, j, k, idx[10];
long rnds[numberof(idx)];
+ long memo_threshold;
=20
if (OPTHASH_GIVEN_P(opts)) {
VALUE rnd;
@@ -4856,6 +4862,11 @@ rb_ary_sample(int argc, VALUE *argv, VALUE ary)
}
return rb_ary_new_from_args(3, RARRAY_AREF(ary, i), RARRAY_AREF(ary, j), =
RARRAY_AREF(ary, k));
}
+ memo_threshold =3D
+ len < 2560 ? len / 128 :
+ len < 5120 ? len / 64 :
+ len < 10240 ? len / 32 :
+ len / 16;
if (n <=3D numberof(idx)) {
long sorted[numberof(idx)];
sorted[0] =3D idx[0] =3D rnds[0];
@@ -4875,6 +4886,24 @@ rb_ary_sample(int argc, VALUE *argv, VALUE ary)
}
});
}
+ else if (n <=3D memo_threshold) {
+ VALUE tbl =3D tmpbuf(len, sizeof(long));
+ long *table =3D (long*)RSTRING_PTR(tbl);
+ result =3D rb_ary_new_capa(n);
+ for (i=3D0; i<n; i++) {
+ table[i] =3D -1L;
+ }
+ for (i=3D0; i<n; i++) {
+ long v;
+ long j2 =3D j =3D RAND_UPTO(len-i) + i;
+ long i2 =3D i;
+ if ((v =3D table[i]) >=3D 0) i2 =3D v;
+ if ((v =3D table[j]) >=3D 0) j2 =3D v;
+ table[j] =3D i2;
+ RARRAY_ASET(result, i, RARRAY_AREF(ary, j2));
+ }
+ tmpbuf_discard(tbl);
+ }
else {
result =3D rb_ary_dup(ary);
RBASIC_CLEAR_CLASS(result);
@@ -4955,11 +4984,6 @@ rb_ary_cycle(int argc, VALUE *argv, VALUE ary)
return Qnil;
}
=20
-#define tmpbuf(n, size) rb_str_tmp_new((n)*(size))
-#define tmpbuf_discard(s) (rb_str_resize((s), 0L), RBASIC_SET_CLASS_RAW(s,=
rb_cString))
-#define tmpary(n) rb_ary_tmp_new(n)
-#define tmpary_discard(a) (ary_discard(a), RBASIC_SET_CLASS_RAW(a, rb_cArr=
ay))
-
/*
* Build a ruby array of the corresponding values and yield it to the
* associated block.
```
----------------------------------------
Bug #13136: large_array.sample(11)=E3=81=8C=E9=81=85=E3=81=84
https://bugs.ruby-lang.org/issues/13136#change-62591
* Author: tomoya ishida
* Status: Open
* Priority: Normal
* Assignee:=20
* Target version:=20
* ruby -v: ruby 2.4.0p0 (2016-12-24 revision 57164) [x86_64-darwin16]
* Backport: 2.2: UNKNOWN, 2.3: UNKNOWN, 2.4: UNKNOWN
----------------------------------------
Array#sample=E3=81=AE=E3=83=91=E3=83=95=E3=82=A9=E3=83=BC=E3=83=9E=E3=83=B3=
=E3=82=B9=E3=82=92=E6=94=B9=E5=96=84=E3=81=97=E3=81=9F=E3=81=84
```ruby
require 'benchmark'
arr =3D 100000.times.to_a;
Benchmark.measure{100000.times{arr.sample 10}}.real #=3D> 0.070521000074222=
68 =E9=80=9F=E3=81=84!
Benchmark.measure{100000.times{arr.sample 11}}.real #=3D> 37.8880900000222 =
=E9=81=85=E3=81=84!!!
```
=E7=8F=BE=E7=8A=B6=E3=81=AEArray#sample(n)
```
n<=3D3 =E2=86=92 =E5=80=8B=E5=88=A5=E5=A0=B4=E5=90=88=E5=88=86=E3=81=91
3<n<=3D10 =E2=86=92 2=E9=87=8D=E3=83=AB=E3=83=BC=E3=83=97 O(n^2)
# n < =E9=81=A9=E5=BD=93=E3=81=AA=E9=96=BE=E5=80=A4 =E2=86=92 O(n)=E3=81=AE=
=E3=82=A2=E3=83=AB=E3=82=B4=E3=83=AA=E3=82=BA=E3=83=A0=E3=82=92=E4=BD=BF=E3=
=81=84=E3=81=9F=E3=81=84
n>10 =E2=86=92 dup=E3=81=97=E3=81=A6shuffle O(size) =E5=B7=A8=E5=A4=A7=E9=
=85=8D=E5=88=97=E3=81=8B=E3=82=89=E5=B0=91=E6=95=B0sample=E3=81=99=E3=82=8B=
=E6=99=82=E3=81=AB=E9=81=85=E3=81=84
```
=E3=82=A2=E3=83=AB=E3=82=B4=E3=83=AA=E3=82=BA=E3=83=A0=E6=94=B9=E5=96=84=E6=
=A1=88=E3=81=AB=E3=81=A4=E3=81=84=E3=81=A6
```ruby
# n>10=E3=81=AE=E6=99=82=E3=81=AE=E7=8F=BE=E7=8A=B6=E3=81=AE=E3=82=A2=E3=83=
=AB=E3=82=B4=E3=83=AA=E3=82=BA=E3=83=A0 O(size)
def sample arr, n
arr =3D arr.dup
n.times do |i|
j =3D rand(arr.size - i) + i
arr[i], arr[j] =3D arr[j], arr[i]
end
arr.take n
end
# dup=E3=82=92=E3=82=84=E3=82=81=E3=80=81=E9=85=8D=E5=88=97=E3=81=B8=E3=81=
=AE=E4=BB=A3=E5=85=A5=E3=82=92=E6=B8=9B=E3=82=89=E3=81=99(=E3=81=9F=E3=81=
=A0=E3=81=97=E5=85=83=E9=85=8D=E5=88=97=E3=82=92=E7=A0=B4=E5=A3=8A=E3=81=97=
=E3=81=A6=E3=81=97=E3=81=BE=E3=81=86) O(n)
def sample arr, n
n.times.map do |i|
j =3D rand(arr.size - i) + i
val =3D arr[j]
arr[j] =3D arr[i]
val
end
end
# =E9=85=8D=E5=88=97=E3=82=92=E6=9B=B8=E3=81=8D=E6=8F=9B=E3=81=88=E3=82=8B=
=E4=BB=A3=E3=82=8F=E3=82=8A=E3=81=AB=E3=80=81=E3=81=A9=E3=81=93=E3=82=92=E5=
=85=A5=E3=82=8C=E6=9B=BF=E3=81=88=E3=81=9F=E3=81=8Bhash=E3=81=AB=E4=BF=9D=
=E5=AD=98=E3=81=99=E3=82=8B(=E5=85=83=E9=85=8D=E5=88=97=E3=82=92=E7=A0=B4=
=E5=A3=8A=E3=81=97=E3=81=AA=E3=81=84) O(n)
def sample arr, n
replace_table =3D {}
n.times.map do |i|
j =3D rand(arr.size - i) + i
j2 =3D replace_table[j] || j
replace_table[j] =3D replace_table[i] || i
arr[j2]
end
end
```
patch=E4=BD=9C=E3=82=8A=E3=81=BE=E3=81=97=E3=81=9F
=E3=83=99=E3=83=B3=E3=83=81=E3=83=9E=E3=83=BC=E3=82=AF
```
arr =3D 100000.times.to_a;
def measure;t=3DTime.now;yield;Time.now-t;end
p measure{10000.times{arr.sample 10}}
p measure{10000.times{arr.sample 11}}
p measure{10000.times{arr.sample 100}}
p measure{10000.times{arr.sample 1000}}
```
ruby 2.4.0p0
```
0.008185
3.938569
4.043285
4.142051
```
this patch
```
0.010408
0.016441
0.08861
0.684844
```
---Files--------------------------------
rb_ary_sample.patch (1.42 KB)
rb_ary_sample_patch2.patch (1.78 KB)
--=20
https://bugs.ruby-lang.org/