gcc-4.9/gcc/config/arm/neon-docgen.ml


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424

(* ARM NEON documentation generator.

   Copyright (C) 2006-2014 Free Software Foundation, Inc.
   Contributed by CodeSourcery.

   This file is part of GCC.

   GCC is free software; you can redistribute it and/or modify it under
   the terms of the GNU General Public License as published by the Free
   Software Foundation; either version 3, or (at your option) any later
   version.

   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
   WARRANTY; without even the implied warranty of MERCHANTABILITY or
   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   for more details.

   You should have received a copy of the GNU General Public License
   along with GCC; see the file COPYING3.  If not see
   <http://www.gnu.org/licenses/>.

   This is an O'Caml program.  The O'Caml compiler is available from:

     http://caml.inria.fr/

   Or from your favourite OS's friendly packaging system. Tested with version
   3.09.2, though other versions will probably work too.

   Compile with:
     ocamlc -c neon.ml
     ocamlc -o neon-docgen neon.cmo neon-docgen.ml

   Run with:
     /path/to/neon-docgen /path/to/gcc/doc/arm-neon-intrinsics.texi
*)

open Neon

(* The combined "ops" and "reinterp" table.  *)
let ops_reinterp = reinterp @ ops

(* Helper functions for extracting things from the "ops" table.  *)
let single_opcode desired_opcode () =
  List.fold_left (fun got_so_far ->
                  fun row ->
                    match row with
                      (opcode, _, _, _, _, _) ->
                        if opcode = desired_opcode then row :: got_so_far
                                                   else got_so_far
                 ) [] ops_reinterp

let multiple_opcodes desired_opcodes () =
  List.fold_left (fun got_so_far ->
                  fun desired_opcode ->
                    (single_opcode desired_opcode ()) @ got_so_far)
                 [] desired_opcodes

let ldx_opcode number () =
  List.fold_left (fun got_so_far ->
                  fun row ->
                    match row with
                      (opcode, _, _, _, _, _) ->
                        match opcode with
                          Vldx n | Vldx_lane n | Vldx_dup n when n = number ->
                            row :: got_so_far
                          | _ -> got_so_far
                 ) [] ops_reinterp

let stx_opcode number () =
  List.fold_left (fun got_so_far ->
                  fun row ->
                    match row with
                      (opcode, _, _, _, _, _) ->
                        match opcode with
                          Vstx n | Vstx_lane n when n = number ->
                            row :: got_so_far
                          | _ -> got_so_far
                 ) [] ops_reinterp

let tbl_opcode () =
  List.fold_left (fun got_so_far ->
                  fun row ->
                    match row with
                      (opcode, _, _, _, _, _) ->
                        match opcode with
                          Vtbl _ -> row :: got_so_far
                          | _ -> got_so_far
                 ) [] ops_reinterp

let tbx_opcode () =
  List.fold_left (fun got_so_far ->
                  fun row ->
                    match row with
                      (opcode, _, _, _, _, _) ->
                        match opcode with
                          Vtbx _ -> row :: got_so_far
                          | _ -> got_so_far
                 ) [] ops_reinterp

(* The groups of intrinsics.  *)
let intrinsic_groups =
  [ "Addition", single_opcode Vadd;
    "Multiplication", single_opcode Vmul;
    "Multiply-accumulate", single_opcode Vmla;
    "Multiply-subtract", single_opcode Vmls;
    "Fused-multiply-accumulate", single_opcode Vfma;
    "Fused-multiply-subtract", single_opcode Vfms;
    "Round to integral (to nearest, ties to even)", single_opcode Vrintn;
    "Round to integral (to nearest, ties away from zero)", single_opcode Vrinta;
    "Round to integral (towards +Inf)", single_opcode Vrintp;
    "Round to integral (towards -Inf)", single_opcode Vrintm;
    "Round to integral (towards 0)", single_opcode Vrintz;
    "Subtraction", single_opcode Vsub;
    "Comparison (equal-to)", single_opcode Vceq;
    "Comparison (greater-than-or-equal-to)", single_opcode Vcge;
    "Comparison (less-than-or-equal-to)", single_opcode Vcle;
    "Comparison (greater-than)", single_opcode Vcgt;
    "Comparison (less-than)", single_opcode Vclt;
    "Comparison (absolute greater-than-or-equal-to)", single_opcode Vcage;
    "Comparison (absolute less-than-or-equal-to)", single_opcode Vcale;
    "Comparison (absolute greater-than)", single_opcode Vcagt;
    "Comparison (absolute less-than)", single_opcode Vcalt;
    "Test bits", single_opcode Vtst;
    "Absolute difference", single_opcode Vabd;
    "Absolute difference and accumulate", single_opcode Vaba;
    "Maximum", single_opcode Vmax;
    "Minimum", single_opcode Vmin;
    "Pairwise add", single_opcode Vpadd;
    "Pairwise add, single_opcode widen and accumulate", single_opcode Vpada;
    "Folding maximum", single_opcode Vpmax;
    "Folding minimum", single_opcode Vpmin;
    "Reciprocal step", multiple_opcodes [Vrecps; Vrsqrts];
    "Vector shift left", single_opcode Vshl;
    "Vector shift left by constant", single_opcode Vshl_n;
    "Vector shift right by constant", single_opcode Vshr_n;
    "Vector shift right by constant and accumulate", single_opcode Vsra_n;
    "Vector shift right and insert", single_opcode Vsri;
    "Vector shift left and insert", single_opcode Vsli;
    "Absolute value", single_opcode Vabs;
    "Negation", single_opcode Vneg;
    "Bitwise not", single_opcode Vmvn;
    "Count leading sign bits", single_opcode Vcls;
    "Count leading zeros", single_opcode Vclz;
    "Count number of set bits", single_opcode Vcnt;
    "Reciprocal estimate", single_opcode Vrecpe;
    "Reciprocal square-root estimate", single_opcode Vrsqrte;
    "Get lanes from a vector", single_opcode Vget_lane;
    "Set lanes in a vector", single_opcode Vset_lane;
    "Create vector from literal bit pattern", single_opcode Vcreate;
    "Set all lanes to the same value",
      multiple_opcodes [Vdup_n; Vmov_n; Vdup_lane];
    "Combining vectors", single_opcode Vcombine;
    "Splitting vectors", multiple_opcodes [Vget_high; Vget_low];
    "Conversions", multiple_opcodes [Vcvt; Vcvt_n];
    "Move, single_opcode narrowing", single_opcode Vmovn;
    "Move, single_opcode long", single_opcode Vmovl;
    "Table lookup", tbl_opcode;
    "Extended table lookup", tbx_opcode;
    "Multiply, lane", single_opcode Vmul_lane;
    "Long multiply, lane", single_opcode Vmull_lane;
    "Saturating doubling long multiply, lane", single_opcode Vqdmull_lane;
    "Saturating doubling multiply high, lane", single_opcode Vqdmulh_lane;
    "Multiply-accumulate, lane", single_opcode Vmla_lane;
    "Multiply-subtract, lane", single_opcode Vmls_lane;
    "Vector multiply by scalar", single_opcode Vmul_n;
    "Vector long multiply by scalar", single_opcode Vmull_n;
    "Vector saturating doubling long multiply by scalar",
      single_opcode Vqdmull_n;
    "Vector saturating doubling multiply high by scalar",
      single_opcode Vqdmulh_n;
    "Vector multiply-accumulate by scalar", single_opcode Vmla_n;
    "Vector multiply-subtract by scalar", single_opcode Vmls_n;
    "Vector extract", single_opcode Vext;
    "Reverse elements", multiple_opcodes [Vrev64; Vrev32; Vrev16];
    "Bit selection", single_opcode Vbsl;
    "Transpose elements", single_opcode Vtrn;
    "Zip elements", single_opcode Vzip;
    "Unzip elements", single_opcode Vuzp;
    "Element/structure loads, VLD1 variants", ldx_opcode 1;
    "Element/structure stores, VST1 variants", stx_opcode 1;
    "Element/structure loads, VLD2 variants", ldx_opcode 2;
    "Element/structure stores, VST2 variants", stx_opcode 2;
    "Element/structure loads, VLD3 variants", ldx_opcode 3;
    "Element/structure stores, VST3 variants", stx_opcode 3;
    "Element/structure loads, VLD4 variants", ldx_opcode 4;
    "Element/structure stores, VST4 variants", stx_opcode 4;
    "Logical operations (AND)", single_opcode Vand;
    "Logical operations (OR)", single_opcode Vorr;
    "Logical operations (exclusive OR)", single_opcode Veor;
    "Logical operations (AND-NOT)", single_opcode Vbic;
    "Logical operations (OR-NOT)", single_opcode Vorn;
    "Reinterpret casts", single_opcode Vreinterp ]

(* Given an intrinsic shape, produce a string to document the corresponding
   operand shapes.  *)
let rec analyze_shape shape =
  let rec n_things n thing =
    match n with
      0 -> []
    | n -> thing :: (n_things (n - 1) thing)
  in
  let rec analyze_shape_elt reg_no elt =
    match elt with
      Dreg -> "@var{d" ^ (string_of_int reg_no) ^ "}"
    | Qreg -> "@var{q" ^ (string_of_int reg_no) ^ "}"
    | Corereg -> "@var{r" ^ (string_of_int reg_no) ^ "}"
    | Immed -> "#@var{0}"
    | VecArray (1, elt) ->
        let elt_regexp = analyze_shape_elt 0 elt in
          "@{" ^ elt_regexp ^ "@}"
    | VecArray (n, elt) ->
      let rec f m =
        match m with
          0 -> []
        | m -> (analyze_shape_elt (m - 1) elt) :: (f (m - 1))
      in
      let ops = List.rev (f n) in
        "@{" ^ (commas (fun x -> x) ops "") ^ "@}"
    | (PtrTo elt | CstPtrTo elt) ->
      "[" ^ (analyze_shape_elt reg_no elt) ^ "]"
    | Element_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[@var{0}]"
    | Element_of_qreg -> (analyze_shape_elt reg_no Qreg) ^ "[@var{0}]"
    | All_elements_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[]"
    | Alternatives alts -> (analyze_shape_elt reg_no (List.hd alts))
  in
    match shape with
      All (n, elt) -> commas (analyze_shape_elt 0) (n_things n elt) ""
    | Long -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Dreg) ^
              ", " ^ (analyze_shape_elt 0 Dreg)
    | Long_noreg elt -> (analyze_shape_elt 0 elt) ^ ", " ^
              (analyze_shape_elt 0 elt)
    | Wide -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^
              ", " ^ (analyze_shape_elt 0 Dreg)
    | Wide_noreg elt -> analyze_shape (Long_noreg elt)
    | Narrow -> (analyze_shape_elt 0 Dreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^
                ", " ^ (analyze_shape_elt 0 Qreg)
    | Use_operands elts -> commas (analyze_shape_elt 0) (Array.to_list elts) ""
    | By_scalar Dreg ->
        analyze_shape (Use_operands [| Dreg; Dreg; Element_of_dreg |])
    | By_scalar Qreg ->
        analyze_shape (Use_operands [| Qreg; Qreg; Element_of_dreg |])
    | By_scalar _ -> assert false
    | Wide_lane ->
        analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |])
    | Wide_scalar ->
        analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |])
    | Pair_result elt ->
      let elt_regexp = analyze_shape_elt 0 elt in
      let elt_regexp' = analyze_shape_elt 1 elt in
        elt_regexp ^ ", " ^ elt_regexp'
    | Unary_scalar _ -> "FIXME Unary_scalar"
    | Binary_imm elt -> analyze_shape (Use_operands [| elt; elt; Immed |])
    | Narrow_imm -> analyze_shape (Use_operands [| Dreg; Qreg; Immed |])
    | Long_imm -> analyze_shape (Use_operands [| Qreg; Dreg; Immed |])

(* Document a single intrinsic.  *)
let describe_intrinsic first chan
                       (elt_ty, (_, features, shape, name, munge, _)) =
  let c_arity, new_elt_ty = munge shape elt_ty in
  let c_types = strings_of_arity c_arity in
  Printf.fprintf chan "@itemize @bullet\n";
  let item_code = if first then "@item" else "@itemx" in
    Printf.fprintf chan "%s %s %s_%s (" item_code (List.hd c_types)
                   (intrinsic_name name) (string_of_elt elt_ty);
    Printf.fprintf chan "%s)\n" (commas (fun ty -> ty) (List.tl c_types) "");
    if not (List.exists (fun feature -> feature = No_op) features) then
    begin
      let print_one_insn name =
        Printf.fprintf chan "@code{";
        let no_suffix = (new_elt_ty = NoElts) in
        let name_with_suffix =
          if no_suffix then name
          else name ^ "." ^ (string_of_elt_dots new_elt_ty)
        in
        let possible_operands = analyze_all_shapes features shape
                                                   analyze_shape
        in
	let rec print_one_possible_operand op =
	  Printf.fprintf chan "%s %s}" name_with_suffix op
        in
          (* If the intrinsic expands to multiple instructions, we assume
             they are all of the same form.  *)
          print_one_possible_operand (List.hd possible_operands)
      in
      let rec print_insns names =
        match names with
          [] -> ()
        | [name] -> print_one_insn name
        | name::names -> (print_one_insn name;
                          Printf.fprintf chan " @emph{or} ";
                          print_insns names)
      in
      let insn_names = get_insn_names features name in
        Printf.fprintf chan "@*@emph{Form of expected instruction(s):} ";
        print_insns insn_names;
        Printf.fprintf chan "\n"
    end;
    Printf.fprintf chan "@end itemize\n";
    Printf.fprintf chan "\n\n"

(* Document a group of intrinsics.  *)
let document_group chan (group_title, group_extractor) =
  (* Extract the rows in question from the ops table and then turn them
     into a list of intrinsics.  *)
  let intrinsics =
    List.fold_left (fun got_so_far ->
                    fun row ->
                      match row with
                        (_, _, _, _, _, elt_tys) ->
                          List.fold_left (fun got_so_far' ->
                                          fun elt_ty ->
                                            (elt_ty, row) :: got_so_far')
                                         got_so_far elt_tys
                   ) [] (group_extractor ())
  in
    (* Emit the title for this group.  *)
    Printf.fprintf chan "@subsubsection %s\n\n" group_title;
    (* Emit a description of each intrinsic.  *)
    List.iter (describe_intrinsic true chan) intrinsics;
    (* Close this group.  *)
    Printf.fprintf chan "\n\n"

let gnu_header chan =
  List.iter (fun s -> Printf.fprintf chan "%s\n" s) [
  "@c Copyright (C) 2006-2014 Free Software Foundation, Inc.";
  "@c This is part of the GCC manual.";
  "@c For copying conditions, see the file gcc.texi.";
  "";
  "@c This file is generated automatically using gcc/config/arm/neon-docgen.ml";
  "@c Please do not edit manually."]

let crypto_doc =
"
@itemize @bullet
@item poly128_t vldrq_p128(poly128_t const *)
@end itemize

@itemize @bullet
@item void vstrq_p128(poly128_t *, poly128_t)
@end itemize

@itemize @bullet
@item uint64x1_t vceq_p64 (poly64x1_t, poly64x1_t)
@end itemize

@itemize @bullet
@item uint64x1_t vtst_p64 (poly64x1_t, poly64x1_t)
@end itemize

@itemize @bullet
@item uint32_t vsha1h_u32 (uint32_t)
@*@emph{Form of expected instruction(s):} @code{sha1h.32 @var{q0}, @var{q1}}
@end itemize

@itemize @bullet
@item uint32x4_t vsha1cq_u32 (uint32x4_t, uint32_t, uint32x4_t)
@*@emph{Form of expected instruction(s):} @code{sha1c.32 @var{q0}, @var{q1}, @var{q2}}
@end itemize

@itemize @bullet
@item uint32x4_t vsha1pq_u32 (uint32x4_t, uint32_t, uint32x4_t)
@*@emph{Form of expected instruction(s):} @code{sha1p.32 @var{q0}, @var{q1}, @var{q2}}
@end itemize

@itemize @bullet
@item uint32x4_t vsha1mq_u32 (uint32x4_t, uint32_t, uint32x4_t)
@*@emph{Form of expected instruction(s):} @code{sha1m.32 @var{q0}, @var{q1}, @var{q2}}
@end itemize

@itemize @bullet
@item uint32x4_t vsha1su0q_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
@*@emph{Form of expected instruction(s):} @code{sha1su0.32 @var{q0}, @var{q1}, @var{q2}}
@end itemize

@itemize @bullet
@item uint32x4_t vsha1su1q_u32 (uint32x4_t, uint32x4_t)
@*@emph{Form of expected instruction(s):} @code{sha1su1.32 @var{q0}, @var{q1}, @var{q2}}
@end itemize

@itemize @bullet
@item uint32x4_t vsha256hq_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
@*@emph{Form of expected instruction(s):} @code{sha256h.32 @var{q0}, @var{q1}, @var{q2}}
@end itemize
 
@itemize @bullet
@item uint32x4_t vsha256h2q_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
@*@emph{Form of expected instruction(s):} @code{sha256h2.32 @var{q0}, @var{q1}, @var{q2}}
@end itemize
 
@itemize @bullet
@item uint32x4_t vsha256su0q_u32 (uint32x4_t, uint32x4_t)
@*@emph{Form of expected instruction(s):} @code{sha256su0.32 @var{q0}, @var{q1}}
@end itemize
 
@itemize @bullet
@item uint32x4_t vsha256su1q_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
@*@emph{Form of expected instruction(s):} @code{sha256su1.32 @var{q0}, @var{q1}, @var{q2}}
@end itemize

@itemize @bullet
@item poly128_t vmull_p64 (poly64_t a, poly64_t b)
@*@emph{Form of expected instruction(s):} @code{vmull.p64 @var{q0}, @var{d1}, @var{d2}}
@end itemize

@itemize @bullet
@item poly128_t vmull_high_p64 (poly64x2_t a, poly64x2_t b)
@*@emph{Form of expected instruction(s):} @code{vmull.p64 @var{q0}, @var{d1}, @var{d2}}
@end itemize
"

(* Program entry point.  *)
let _ =
  if Array.length Sys.argv <> 2 then
    failwith "Usage: neon-docgen <output filename>"
  else
  let file = Sys.argv.(1) in
    try
      let chan = open_out file in
        gnu_header chan;
        List.iter (document_group chan) intrinsic_groups;
        Printf.fprintf chan "%s\n" crypto_doc;
        close_out chan
    with Sys_error sys ->
      failwith ("Could not create output file " ^ file ^ ": " ^ sys)