index.txt 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. # Speed up that library when you can't C a thing
  2. John Melesky
  3. (Open Source Bridge, June 2009)
  4. ---
  5. # Caveats
  6. - We're talking about Unix and Linux
  7. - Most should be applicable on OSX once you factor in the dynlib stuff
  8. - I have no idea on the Windows front; sorry
  9. ---
  10. # Caveats
  11. - We're talking about Unix and Linux
  12. - Most should be applicable on OSX once you factor in the dynlib stuff
  13. - I have no idea on the Windows front; sorry
  14. - The example is in Ruby and Ocaml
  15. - The FFIs for Perl and Python are similar
  16. - Other called languages are possible
  17. - I'll try to be explicit what's general and what's example-specific
  18. ---
  19. # Ever in this situation?
  20. ---
  21. # The culprit
  22. def matrix_mul(a,b)
  23. rows = (0 .. a.size - 1).collect {
  24. |i|
  25. (0 .. b[0].size - 1).collect {
  26. |j|
  27. vij = 0
  28. 0.upto(a[i].size - 1) do
  29. |k|
  30. vij += a[i][k] * b[k][j]
  31. end
  32. vij
  33. }
  34. }
  35. return rows
  36. end
  37. (adapted from matrix.rb)
  38. ---
  39. # The solution
  40. <table>
  41. <tr>
  42. <td><b><font size="+2">That's what C is for, right?</b></td>
  43. <td><img src="img/dhh.jpg"></td>
  44. </tr>
  45. </table>
  46. ---
  47. # That's not what C is for!
  48. C is a systems language, intended for use with 30-year-old kernels and drivers, on processors with fewer than 10k transistors (we're currently in the billion-transistor range, thankyouverymuch Moore's Law).
  49. ---
  50. # That's not what C is for!
  51. C is a systems language, intended for use with 30-year-old kernels and drivers, on processors with fewer than 10k transistors (we're currently in the billion-transistor range, thankyouverymuch Moore's Law).
  52. In other words, it's fast only as a side effect.
  53. ---
  54. # Other languages are better
  55. For raw numerics, Fortran still has C beat. APL is pretty dang fast, too.
  56. For complex datatypes, languages like Haskell or ML will all give you better type support, safer usage, and possibly faster code.
  57. Really, almost any language offers benefits over C, whether it's faster, safer, more concise, more advanced, or all of the above.
  58. ---
  59. # What are the options?
  60. The only constraint is that your target language needs an FFI (Foreign Function Interface) that allows C to call it. Most awesome languages allow this. I'm going to use Ocaml.
  61. The other material requirements are your language header files (`ruby.h`, `caml/callback.h`, etc.), and perserverence.
  62. ---
  63. # What are the options?
  64. The only constraint is that your target language needs an FFI (Foreign Function Interface) that allows C to call it. Most awesome languages allow this. I'm going to use Ocaml.
  65. The other material requirements are your language header files (`ruby.h`, `caml/callback.h`, etc.), and perserverence.
  66. (You may look warily upon the whole "allows C to call it" thing. This is understandable. Things will get worse before they get better.)
  67. ---
  68. # What's Ocaml?
  69. In brief, it's a statically- and strongly- typed, non-pure, strict, functional programming language. It's from the ML family (which includes SML, F#, and some others).
  70. Ocaml's pretty cool at the moment, because it's one of the fastest languages around, routinely rivalling C++ in the Shootout.
  71. ---
  72. # Matrix Multiplication in Ocaml, round 1
  73. let matrix_mul a b =
  74. List.map
  75. (fun row ->
  76. mapn
  77. (fun column ->
  78. List.fold_left (+) 0
  79. (List.map2 ( * ) row column))
  80. b)
  81. a
  82. (cribbed from RosettaCode)
  83. ---
  84. # Benefits
  85. let matrix_mul a b =
  86. List.map
  87. (fun row ->
  88. mapn
  89. (fun column ->
  90. List.fold_left (+) 0
  91. (List.map2 ( * ) row column))
  92. b)
  93. a
  94. We've acheived a 10x speedup, and our code is not actually much more complicated than the Ruby.
  95. This code uses Lists, which are pretty natural to use in functional languages. Arrays would probably be faster, though.
  96. ---
  97. # Matrix Multiplication in Ocaml, round 2
  98. let matrix_mul x y =
  99. let x0 = Array.length x
  100. and y0 = Array.length y in
  101. let y1 = if y0 = 0 then 0 else Array.length y.(0) in
  102. let z = Array.make_matrix x0 y1 0 in
  103. for i = 0 to x0-1 do
  104. for j = 0 to y1-1 do
  105. for k = 0 to y0-1 do
  106. z.(i).(j) <- z.(i).(j) + x.(i).(k) * y.(k).(j)
  107. done
  108. done
  109. done;
  110. z
  111. (also cribbed from RosettaCode)
  112. ---
  113. # Benefits
  114. let matrix_mul x y =
  115. let x0 = Array.length x
  116. and y0 = Array.length y in
  117. let y1 = if y0 = 0 then 0 else Array.length y.(0) in
  118. let z = Array.make_matrix x0 y1 0 in
  119. for i = 0 to x0-1 do
  120. for j = 0 to y1-1 do
  121. for k = 0 to y0-1 do
  122. z.(i).(j) <- z.(i).(j) + x.(i).(k) * y.(k).(j)
  123. done
  124. done
  125. done;
  126. z
  127. Ocaml Arrays are pretty darn fast, and have a great initialization function tailored to this sort of problem (make_matrix).
  128. We've now managed a speedup around 30-35x. And, again, the code is not that much more complex than the original Ruby.
  129. ---
  130. # Gluing Pieces Together
  131. Of course, the Ocaml code is operating on Ocaml types, and Ruby wants to pass it Ruby types. What to do, what to do?
  132. ---
  133. # It's gonna get worse before it gets better...
  134. Ruby can call C code...
  135. ---
  136. # It's gonna get worse before it gets better...
  137. Ruby can call C code...
  138. Ocaml can be called by C code...
  139. ---
  140. # It's gonna get worse before it gets better...
  141. Ruby can call C code...
  142. Ocaml can be called by C code...
  143. They both have facilities to translate their types into C types (and Ocaml vice-versa)...
  144. ---
  145. # FFI Aside
  146. Many languages provide C type definitions and functions for extensions. There are two problems with this.
  147. 1. Many of these FFIs aren't merely type declarations and functions, but also macros. That means they only work in C.
  148. 2. Even if they were macro-free, few languages have all-purpose transparent C access. Thus, wrapper code is always needed.
  149. ---
  150. # Dancing the Type Two-Step
  151. Let's work our way out, from Ocaml to Ruby, via C. First, we need to tell Ocaml to export a function to C.
  152. let matrix_mul x y =
  153. let x0 = Array.length x
  154. and y0 = Array.length y in
  155. let y1 = if y0 = 0 then 0 else Array.length y.(0) in
  156. let z = Array.make_matrix x0 y1 0 in
  157. for i = 0 to x0-1 do
  158. for j = 0 to y1-1 do
  159. for k = 0 to y0-1 do
  160. z.(i).(j) <- z.(i).(j) + x.(i).(k) * y.(k).(j)
  161. done
  162. done
  163. done;
  164. z
  165. let _ = Callback.register "ocaml_matrix_mul" matrix_mul;;
  166. ---
  167. # Dancing the Type Two-Step
  168. Next, we need some C code to call the Ocaml.
  169. #include "caml/callback.h"
  170. void initcaml(void) {
  171. char *dummy_argv[] = {0};
  172. caml_startup(dummy_argv);
  173. }
  174. First we initialize the Ocaml runtime.
  175. ---
  176. # Dancing the Type Two-Step
  177. int interm_matrix_mul(*a, *b) {
  178. static value *f = NULL;
  179. f = caml_named_value("ocaml_matrix_mul");
  180. if (f == NULL)
  181. return -1;
  182. value r_caml;
  183. r_caml = callback2(*f, a, b);
  184. This is how we grab and execute the exported code. Note that we haven't figured the types for `a` or `b`, nor have we converted r_caml to something C-like. Who's up for C arrays of C arrays of C `int`s? (Or should they be `long`s?)
  185. ---
  186. # Dancing the Type Two-Step
  187. From the Ruby side, we'd do something like this.
  188. #include "ruby.h"
  189. VALUE OcamlMatrix = Qnil; // set up the Ruby namespace
  190. // prototype our init and method
  191. void Init_ocamlmatrix();
  192. VALUE method_matrix_mul(VALUE self, VALUE a, VALUE b);
  193. void Init_ocamlmatrix() {
  194. OcamlMatrix = rb_define_module("OcamlMatrix");
  195. rb_define_method(OcamlMatrix, "matrix_mul", method_matrix_mul, 0);
  196. }
  197. ---
  198. # Dancing the Type Two-Step
  199. VALUE method_matrix_mul(VALUE self, VALUE a, VALUE b) {
  200. // here we would convert a and b to C arrays
  201. c = interm_matrix_mul(conv_a, conv_b);
  202. // and then convert c from C arrays to a Ruby VALUE
  203. return conv_c;
  204. }
  205. ---
  206. # You probably saw this coming
  207. You can convert directly from the Ruby types to Ocaml types without C intermediate types. This is nice, because it avoids any need for structs or pointer arithmetic.
  208. ---
  209. # You probably saw this coming
  210. You can convert directly from the Ruby types to Ocaml types without C intermediate types. This is nice, because it avoids any need for structs or pointer arithmetic.
  211. But... this only works if you can import both languages' headers without name conflicts (thankfully Ruby's VALUE and Ocaml's value are spelled differently).
  212. ---
  213. # From Two-Step to Tango
  214. VALUE method_matrix_mul(VALUE self, VALUE a, VALUE b) {
  215. CAMLparam0 ();
  216. CAMLlocal2 (caml_a, caml_b);
  217. CAMLlocal1 (caml_interm);
  218. CAMLlocal1 (caml_item);
  219. int x = RARRAY(a)->len;
  220. for (int i = 0; i<x; i++) {
  221. VALUE row = rb_ary_entry(a, i); // row = a[i]
  222. int y = RARRAY(row)->len;
  223. caml_interm = caml_alloc(y, 0);
  224. for (int j = 0; j<y; j++) {
  225. VALUE item = rb_ary_entry(row, j);
  226. long c_item = NUM2LONG(item);
  227. caml_item = Val_long(c_item);
  228. Store_field(caml_interm, j, caml_item);
  229. }
  230. }
  231. // do the same for b
  232. caml_c = interm_matrix_mul(caml_a, caml_b);
  233. // and then convert caml_c from value to a Ruby VALUE
  234. CAMLreturnT(VALUE, ruby_c);
  235. }
  236. ---
  237. # Did you spot the problems?
  238. ---
  239. # Hmmm...
  240. ---
  241. # Hmmm...
  242. Complex data is the problem.
  243. ---
  244. # Hmmm...
  245. Complex data is the problem.
  246. What if the data was already in Ocaml types when matrix_mul is called?
  247. ---
  248. # Don't wrap a function, wrap a class
  249. ---
  250. # Tango to Salsa
  251. let new_matrix width height =
  252. Array.make_matrix width height 0
  253. let matrix_assign m x y item =
  254. m.(x).(y) <- item
  255. let matrix_retrieve m x y =
  256. m.(x).(y)
  257. let _ = Callback.register "ocaml_new_matrix" new_matrix
  258. let _ = Callback.register "ocaml_matrix_assign" matrix_assign
  259. let _ = Callback.register "ocaml_matrix_retrieve" matrix_retrieve
  260. let _ = Callback.register "ocaml_matrix_mul" matrix_mul
  261. ---
  262. # Tango to Salsa
  263. void Init_ocamlmatrix() {
  264. OcamlMatrix = rb_define_module("OcamlMatrix");
  265. rb_define_singleton_method(OcamlMatrix, "w_new_matrix", singleton_matrix_new, 2);
  266. rb_define_method(OcamlMatrix, "w_matrix_assign", method_assign, 3);
  267. rb_define_method(OcamlMatrix, "w_matrix_retrieve", method_retrieve, 2);
  268. rb_define_method(OcamlMatrix, "w_matrix_mul", method_matrix_mul, 1);
  269. initcaml();
  270. }
  271. ---
  272. # Tango to Salsa
  273. VALUE singleton_matrix_new(VALUE class, VALUE width, VALUE height) {
  274. CAMLparam0 ();
  275. CAMLlocal1 (ocaml_m);
  276. ocaml_m = ocaml_new_matrix(Val_long(NUM2LONG(width)),
  277. Val_long(NUM2LONG(height)));
  278. VALUE my_m = Data_Wrap_Struct(class, 0, 0, ocaml_m);
  279. rb_obj_call_init(my_m, 0, NULL);
  280. CAMLreturnT (VALUE, my_m);
  281. }
  282. ---
  283. # Tango to Salsa
  284. void method_assign(VALUE self, VALUE x, VALUE y, VALUE item) {
  285. CAMLparam0 ();
  286. CAMLlocal1 (ocaml_m);
  287. Data_Get_Struct(self, value, ocaml_m);
  288. ocaml_matrix_assign(ocaml_m, Val_long(NUM2LONG(x)),
  289. Val_long(NUM2LONG(y)),
  290. Val_long(NUM2LONG(item)));
  291. CAMLreturn0;
  292. }
  293. VALUE method_retrieve(VALUE self, VALUE x, VALUE y) {
  294. CAMLparam0 ();
  295. CAMLlocal1 (ocaml_m);
  296. Data_Get_Struct(self, value, ocaml_m);
  297. VALUE retval = LONG2NUM(Long_val(ocaml_matrix_retrieve(ocaml_m,
  298. Val_long(NUM2LONG(x)),
  299. Val_long(NUM2LONG(y)))
  300. ));
  301. CAMLreturnT (VALUE, retval);
  302. }
  303. ---
  304. # Tango to Salsa
  305. VALUE method_matrix_mul(VALUE self, VALUE b) {
  306. CAMLparam0 ();
  307. CAMLlocal3 (ocaml_m1, ocaml_m2, ocaml_m3);
  308. Data_Get_Struct(self, value, ocaml_m1);
  309. Data_Get_Struct(b, value, ocaml_m2);
  310. ocaml_m3 = ocaml_matrix_mul(ocaml_m1, ocaml_m2);
  311. VALUE my_m = Data_Wrap_Struct(class, 0, 0, ocaml_m);
  312. rb_obj_call_init(my_m, 0, NULL);
  313. CAMLreturnT (VALUE, my_m);
  314. }
  315. ---
  316. # Add a Ruby slipper
  317. ---
  318. # No Silver Bullets
  319. But we have a couple bronze ones.
  320. ---
  321. # Perl
  322. Inline:: libraries on CPAN.
  323. ---
  324. # Perl
  325. use Inline "Befunge";
  326. hello();
  327. __END__
  328. __Befunge__
  329. ;:hello - print a msg;<q_,#! #:<"Hello world!"a
  330. ;
  331. :
  332. a q - ;tcartsbus:;
  333. d
  334. d
  335. ;
  336. + ;not a valid func def;
  337. q
  338. ---
  339. # Python
  340. PyCaml, PyObjC.
  341. ---
  342. # Python Calling Python
  343. SciPy, NumPy.
  344. Cython.
  345. ---
  346. # Ruby
  347. Well after i decided on using Ruby and using Ocaml...
  348. ---
  349. # Ruby
  350. Well after i decided on using Ruby and using Ocaml...
  351. ... i found [Rocaml](http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/256569).
  352. ---
  353. # Ruby
  354. Well after i decided on using Ruby and using Ocaml...
  355. ... i found [Rocaml](http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/256569).
  356. Also available is the Ruby clone of Perl's Inline::, including InlineFortran.
  357. ---
  358. # Phew
  359. ---
  360. # Phew
  361. Any questions?
  362. ---
  363. # Thanks
  364. Thanks for coming. Thanks OSBridge for putting together an awesome conference.
  365. ---