#!/bin/perl

#
# The contents of this file are subject to the Mozilla Public
# License Version 1.1 (the "License"); you may not use this file
# except in compliance with the License. You may obtain a copy of
# the License at http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS
# IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
# implied. See the License for the specific language governing
# rights and limitations under the License.
#
# The Original Code is Mozilla MathML Project.
#
# The Initial Developer of the Original Code is The University Of
# Queensland.  Portions created by The University Of Queensland are
# Copyright (C) 1999 The University Of Queensland.  All
# Rights Reserved.
#
# Contributor(s):
#   Roger B. Sidje <rbs@maths.uq.edu.au>
#

# Purpose:
#   This script produces data for the operator dictionary
#   RBS - Aug 28, 1999.

# Output: the file $operator_file contains the dictionary in suitable
# format for inclusion with C++ macros.

# $operator_file = '..\content\src\nsMathMLOperatorList.h';
$operator_file = 'operator.list';
$unicode_file = "byalpha.txt";

&getUnicode($unicode_file);  # $unicode_file = "byalpha.txt" or "bycodes.txt"
                             # byalpha.txt, bycodes.txt are simply the Save as text of
                             # http://www.w3.org/TR/REC-MathML/chap6/byalpha.html
                             # http://www.w3.org/TR/REC-MathML/chap6/bycodes.html

&getMathMLOperators();       # Get the MathML Operators - exact *copy-paste* from
                             # http://www.w3.org/TR/REC-MathML/appendixC.html


print "\n***** Saving into the file *****  $operator_file  *****\n";
 
open(OUTPUT_FILE, ">$operator_file") || die("can't open $operator_file");

print OUTPUT_FILE <<HEADER_DATA;
/*
 * The contents of this file are subject to the Mozilla Public
 * License Version 1.1 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 *
 * The Original Code is Mozilla MathML Project.
 *
 * The Initial Developer of the Original Code is The University Of
 * Queensland.  Portions created by The University Of Queensland are
 * Copyright (C) 1999 The University Of Queensland.  All Rights Reserved.
 *
 * Contributor(s):
 *   Roger B. Sidje <rbs\@maths.uq.edu.au>
 */

/* MathML Operator Dictionary - Auto-generated by operator.pl. Do not edit! */

/* FORMAT
MATHML_OPERATOR(_rank,
                _operator,= string value of the operator
                _flags,   = bitwise: movablelimits|separator|largeop|accent|fence|stretchy|form
                _lspace,  = leftspace in em
                _rspace)  = rightspace in em
*/

HEADER_DATA


# There are three global variables available here:
# $MACRO_LIST_UNICHAR  - list of unicode that make the operator names
# $MACRO_LIST_UNIDATA  - dictionary with the name of the operator as unicode
# $MACRO_LIST_ENTDATA  - dictionary with the name of the operator as entity

print OUTPUT_FILE <<CONTENT;
#ifdef WANT_MATHML_OPERATOR_COUNT
#define NS_MATHML_OPERATOR_COUNT $count
#else
#ifdef WANT_MATHML_OPERATOR_UNICHAR
//Unicode(s),\\0// group symbol form
$MACRO_LIST_UNICHAR
#else
$MACRO_LIST_ENTDATA
#endif
#endif
CONTENT

close(OUTPUT_FILE);

print "Done $count operators.\n";

exit(0);

################################






# All outputs are global variables ...

#
# extract all the symbols of the MathML REC byalpha.txt or bycodes.txt (the
# name of the file is passed as argument)
#  INPUT: "byalpha.txt" or "bycodes.txt"
# OUTPUT: - hash array %UNICODE such that $UNICODE{$entity} = $unicode
#         - array @ENTITY such that $ENTITY[$i] is an entity name
#         - hash array %ENTITY_LAST_ALIAS such that
#           $ENTITY_LAST_ALIAS{$unicode} = last entity with that unicode
sub getUnicode {
  local($infile) = @_[0];

  $byalpha = $infile =~ /byalpha/;

  print "\nScanning $infile ...";
  open (INFILE, $infile) || die("Can't open $infile");

  $count = 0;
  while (<INFILE>) {
    #pattern byalpha: entity                   isolat2       377 unicode =capital Z, acute accent
    if ($byalpha) { # byalpha -- ALIASES ARE INCLUDED
      if ( /^([a-z\.]\S+)\s+\S+\s+\d+\s+(\S+)\s+.*/i ) {
        ($entity,$unicode) = ($1,$2);
        if ($UNICODE{$entity}) { #conflicting mapping ?
            next if $UNICODE{$entity} eq $unicode;
            print "\nWARNING! Found: $entity -> $unicode <> $UNICODE{$entity}";
        }

        $UNICODE{$entity} = $unicode;
        $ENTITY_LAST_ALIAS{$unicode} = $entity;
        ++$count;
      }
    }
    else { # bycodes  -- ALIASES ARE NOT INCLUDED
      #pattern bycode: unicode      9  entity     mmlextra    tabulator stop; horizontal tabulation
      if ( /^(\S+)\s+\d+\s+([a-z\.]+)\s+\S+\s+.*/i ) {
        ($unicode,$entity) = ($1,$2);
#        print "\n$entity  $unicode";
        $UNICODE{$entity} = $unicode;
        $ENTITY_LAST_ALIAS{$unicode} = $entity;
        ++$count;
      }
    }
  }
  @ENTITY = (keys %UNICODE);
  print "\nFound: $count unicode points, " . ($#ENTITY+1) . " entities\n";
}


#Make the MathML Operator dictionary
# INPUT:
# OUTPUT:
#   $MACRO_LIST_UNICHAR  - list of unicode that make the operator names
#   $MACRO_LIST_UNIDATA  - dictionary with the name of the operator as unicode
#   $MACRO_LIST_ENTDATA  - dictionary with the name of the operator as entity

sub getMathMLOperators {
	
#Operator attributes:
#                        values                  <-- default -->
#  form           prefix |infix | postfix        determined by position in mrow
#  fence          true | false                   set by dictionary
#  accent         true | false                   set by dictionary
#  lspace         number h-unit                  set by dictionary
#  rspace         number h-unit                  set by dictionary
#  largeop        true | false                   set by dictionary
#  stretchy       true | false                   set by dictionary
#  separator      true | false                   set by dictionary
#  movablelimits  true | false                   set by dictionary
#  symmetric      true | false                   true
#  minzize  number [h-unit | v-unit]             0
#  maxsize  number [h-unit | v-unit] | infinity  infinity

$DATA = <<MathMLOperatorDictionary;
"("                                  form="prefix"  fence="true" stretchy="true"  lspace="0em" rspace="0em"
")"                                  form="postfix" fence="true" stretchy="true"  lspace="0em" rspace="0em"
"["                                  form="prefix"  fence="true" stretchy="true"  lspace="0em" rspace="0em"
"]"                                  form="postfix" fence="true" stretchy="true"  lspace="0em" rspace="0em"
"{"                                  form="prefix"  fence="true" stretchy="true"  lspace="0em" rspace="0em"
"}"                                  form="postfix" fence="true" stretchy="true"  lspace="0em" rspace="0em"
"&CloseCurlyDoubleQuote;"            form="postfix" fence="true"  lspace="0em" rspace="0em"
"&CloseCurlyQuote;"                  form="postfix" fence="true"  lspace="0em" rspace="0em"
"&LeftAngleBracket;"                 form="prefix"  fence="true" stretchy="true"  lspace="0em" rspace="0em"
"&LeftBracketingBar;"                form="prefix"  fence="true" stretchy="true"  lspace="0em" rspace="0em"
"&LeftCeiling;"                      form="prefix"  fence="true" stretchy="true"  lspace="0em" rspace="0em"
"&LeftDoubleBracket;"                form="prefix"  fence="true" stretchy="true"  lspace="0em" rspace="0em"
"&LeftDoubleBracketingBar;"          form="prefix"  fence="true" stretchy="true"  lspace="0em" rspace="0em"
"&LeftFloor;"                        form="prefix"  fence="true" stretchy="true"  lspace="0em" rspace="0em"
"&OpenCurlyDoubleQuote;"             form="prefix"  fence="true"  lspace="0em" rspace="0em"
"&OpenCurlyQuote;"                   form="prefix"  fence="true"  lspace="0em" rspace="0em"
"&RightAngleBracket;"                form="postfix" fence="true" stretchy="true"  lspace="0em" rspace="0em"
"&RightBracketingBar;"               form="postfix" fence="true" stretchy="true"  lspace="0em" rspace="0em"
"&RightCeiling;"                     form="postfix" fence="true" stretchy="true"  lspace="0em" rspace="0em"
"&RightDoubleBracket;"               form="postfix" fence="true" stretchy="true"  lspace="0em" rspace="0em"
"&RightDoubleBracketingBar;"         form="postfix" fence="true" stretchy="true"  lspace="0em" rspace="0em"
"&RightFloor;"                       form="postfix" fence="true" stretchy="true"  lspace="0em" rspace="0em"
"&LeftSkeleton;"                     form="prefix"  fence="true"  lspace="0em" rspace="0em"
"&RightSkeleton;"                    form="postfix" fence="true"  lspace="0em" rspace="0em"

"&InvisibleComma;"                   form="infix"   separator="true"  lspace="0em" rspace="0em"

","                                  form="infix"   separator="true"  lspace="0em" rspace=".33333em"

"&HorizontalLine;"                   form="infix"   stretchy="true" minsize="0"  lspace="0em" rspace="0em"
"&VerticalLine;"                     form="infix"   stretchy="true" minsize="0"  lspace="0em" rspace="0em"

";"                                  form="infix"   separator="true"  lspace="0em" rspace=".27777em"
";"                                  form="postfix" separator="true"  lspace="0em" rspace="0em"

":="                                 form="infix"    lspace=".27777em" rspace=".27777em"
"&Assign;"                           form="infix"    lspace=".27777em" rspace=".27777em"

"&Because;"                          form="infix"    lspace=".27777em" rspace=".27777em"
"&Therefore;"                        form="infix"    lspace=".27777em" rspace=".27777em"

"&VerticalSeparator;"                form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"

"//"                                 form="infix"    lspace=".27777em" rspace=".27777em"

"&Colon;"                            form="infix"    lspace=".27777em" rspace=".27777em"

"&amp;"                              form="prefix"   lspace="0em" rspace=".27777em"
"&amp;"                              form="postfix"  lspace=".27777em" rspace="0em"

"*="                                 form="infix"    lspace=".27777em" rspace=".27777em"
"-="                                 form="infix"    lspace=".27777em" rspace=".27777em"
"+="                                 form="infix"    lspace=".27777em" rspace=".27777em"
"/="                                 form="infix"    lspace=".27777em" rspace=".27777em"

"-&gt;"                                 form="infix"    lspace=".27777em" rspace=".27777em"

":"                                  form="infix"    lspace=".27777em" rspace=".27777em"

".."                                 form="postfix"  lspace=".22222em" rspace="0em"
"..."                                form="postfix"  lspace=".22222em" rspace="0em"

"&SuchThat;"                         form="infix"    lspace=".27777em" rspace=".27777em"

"&DoubleLeftTee;"                    form="infix"    lspace=".27777em" rspace=".27777em"
"&DoubleRightTee;"                   form="infix"    lspace=".27777em" rspace=".27777em"
"&DownTee;"                          form="infix"    lspace=".27777em" rspace=".27777em"
"&LeftTee;"                          form="infix"    lspace=".27777em" rspace=".27777em"
"&RightTee;"                         form="infix"    lspace=".27777em" rspace=".27777em"

"&Implies;"                          form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&RoundImplies;"                     form="infix"    lspace=".27777em" rspace=".27777em"

"|"                                  form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"||"                                 form="infix"    lspace=".22222em" rspace=".22222em"
"&Or;"                               form="infix"   stretchy="true"  lspace=".22222em" rspace=".22222em"

"&amp;&amp;"                         form="infix"    lspace=".27777em" rspace=".27777em"
"&And;"                              form="infix"   stretchy="true"  lspace=".22222em" rspace=".22222em"

"&amp;"                              form="infix"    lspace=".27777em" rspace=".27777em"

"!"                                  form="prefix"   lspace="0em" rspace=".27777em"
"&Not;"                              form="prefix"   lspace="0em" rspace=".27777em"

"&Exists;"                           form="prefix"   lspace="0em" rspace=".27777em"
"&ForAll;"                           form="prefix"   lspace="0em" rspace=".27777em"
"&NotExists;"                        form="prefix"   lspace="0em" rspace=".27777em"

"&Element;"                          form="infix"    lspace=".27777em" rspace=".27777em"
"&NotElement;"                       form="infix"    lspace=".27777em" rspace=".27777em"
"&NotReverseElement;"                form="infix"    lspace=".27777em" rspace=".27777em"
"&NotSquareSubset;"                  form="infix"    lspace=".27777em" rspace=".27777em"
"&NotSquareSubsetEqual;"             form="infix"    lspace=".27777em" rspace=".27777em"
"&NotSquareSuperset;"                form="infix"    lspace=".27777em" rspace=".27777em"
"&NotSquareSupersetEqual;"           form="infix"    lspace=".27777em" rspace=".27777em"
"&NotSubset;"                        form="infix"    lspace=".27777em" rspace=".27777em"
"&NotSubsetEqual;"                   form="infix"    lspace=".27777em" rspace=".27777em"
"&NotSuperset;"                      form="infix"    lspace=".27777em" rspace=".27777em"
"&NotSupersetEqual;"                 form="infix"    lspace=".27777em" rspace=".27777em"
"&ReverseElement;"                   form="infix"    lspace=".27777em" rspace=".27777em"
"&SquareSubset;"                     form="infix"    lspace=".27777em" rspace=".27777em"
"&SquareSubsetEqual;"                form="infix"    lspace=".27777em" rspace=".27777em"
"&SquareSuperset;"                   form="infix"    lspace=".27777em" rspace=".27777em"
"&SquareSupersetEqual;"              form="infix"    lspace=".27777em" rspace=".27777em"
"&Subset;"                           form="infix"    lspace=".27777em" rspace=".27777em"
"&SubsetEqual;"                      form="infix"    lspace=".27777em" rspace=".27777em"
"&Superset;"                         form="infix"    lspace=".27777em" rspace=".27777em"
"&SupersetEqual;"                    form="infix"    lspace=".27777em" rspace=".27777em"

"&DoubleLeftArrow;"                  form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&DoubleLeftRightArrow;"             form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&DoubleRightArrow;"                 form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&DownLeftRightVector;"              form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&DownLeftTeeVector;"                form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&DownLeftVector;"                   form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&DownLeftVectorBar;"                form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&DownRightTeeVector;"               form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&DownRightVector;"                  form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&DownRightVectorBar;"               form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&LeftArrow;"                        form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&LeftArrowBar;"                     form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&LeftArrowRightArrow;"              form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&LeftRightArrow;"                   form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&LeftRightVector;"                  form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&LeftTeeArrow;"                     form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&LeftTeeVector;"                    form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&LeftVector;"                       form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&LeftVectorBar;"                    form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&LowerLeftArrow;"                   form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&LowerRightArrow;"                  form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&RightArrow;"                       form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&RightArrowBar;"                    form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&RightArrowLeftArrow;"              form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&RightTeeArrow;"                    form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&RightTeeVector;"                   form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&RightVector;"                      form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&RightVectorBar;"                   form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&ShortLeftArrow;"                   form="infix"    lspace=".27777em" rspace=".27777em"
"&ShortRightArrow;"                  form="infix"    lspace=".27777em" rspace=".27777em"
"&UpperLeftArrow;"                   form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&UpperRightArrow;"                  form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"

"="                                  form="infix"    lspace=".27777em" rspace=".27777em"
"&lt;"                               form="infix"    lspace=".27777em" rspace=".27777em"
"&gt;"                                  form="infix"    lspace=".27777em" rspace=".27777em"
"!="                                 form="infix"    lspace=".27777em" rspace=".27777em"
"=="                                 form="infix"    lspace=".27777em" rspace=".27777em"
"&lt;="                              form="infix"    lspace=".27777em" rspace=".27777em"
"&gt;="                                 form="infix"    lspace=".27777em" rspace=".27777em"
"&Congruent;"                        form="infix"    lspace=".27777em" rspace=".27777em"
"&CupCap;"                           form="infix"    lspace=".27777em" rspace=".27777em"
"&DotEqual;"                         form="infix"    lspace=".27777em" rspace=".27777em"
"&DoubleVerticalBar;"                form="infix"    lspace=".27777em" rspace=".27777em"
"&Equal;"                            form="infix"    lspace=".27777em" rspace=".27777em"
"&EqualTilde;"                       form="infix"    lspace=".27777em" rspace=".27777em"
"&Equilibrium;"                      form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&GreaterEqual;"                     form="infix"    lspace=".27777em" rspace=".27777em"
"&GreaterEqualLess;"                 form="infix"    lspace=".27777em" rspace=".27777em"
"&GreaterFullEqual;"                 form="infix"    lspace=".27777em" rspace=".27777em"
"&GreaterGreater;"                   form="infix"    lspace=".27777em" rspace=".27777em"
"&GreaterLess;"                      form="infix"    lspace=".27777em" rspace=".27777em"
"&GreaterSlantEqual;"                form="infix"    lspace=".27777em" rspace=".27777em"
"&GreaterTilde;"                     form="infix"    lspace=".27777em" rspace=".27777em"
"&HumpDownHump;"                     form="infix"    lspace=".27777em" rspace=".27777em"
"&HumpEqual;"                        form="infix"    lspace=".27777em" rspace=".27777em"
"&LeftTriangle;"                     form="infix"    lspace=".27777em" rspace=".27777em"
"&LeftTriangleBar;"                  form="infix"    lspace=".27777em" rspace=".27777em"
"&LeftTriangleEqual;"                form="infix"    lspace=".27777em" rspace=".27777em"
"&le;"                               form="infix"    lspace=".27777em" rspace=".27777em"
"&LessEqualGreater;"                 form="infix"    lspace=".27777em" rspace=".27777em"
"&LessFullEqual;"                    form="infix"    lspace=".27777em" rspace=".27777em"
"&LessGreater;"                      form="infix"    lspace=".27777em" rspace=".27777em"
"&LessLess;"                         form="infix"    lspace=".27777em" rspace=".27777em"
"&LessSlantEqual;"                   form="infix"    lspace=".27777em" rspace=".27777em"
"&LessTilde;"                        form="infix"    lspace=".27777em" rspace=".27777em"
"&NestedGreaterGreater;"             form="infix"    lspace=".27777em" rspace=".27777em"
"&NestedLessLess;"                   form="infix"    lspace=".27777em" rspace=".27777em"
"&NotCongruent;"                     form="infix"    lspace=".27777em" rspace=".27777em"
"&NotCupCap;"                        form="infix"    lspace=".27777em" rspace=".27777em"
"&NotDoubleVerticalBar;"             form="infix"    lspace=".27777em" rspace=".27777em"
"&NotEqual;"                         form="infix"    lspace=".27777em" rspace=".27777em"
"&NotEqualTilde;"                    form="infix"    lspace=".27777em" rspace=".27777em"
"&NotGreater;"                       form="infix"    lspace=".27777em" rspace=".27777em"
"&NotGreaterEqual;"                  form="infix"    lspace=".27777em" rspace=".27777em"
"&NotGreaterFullEqual;"              form="infix"    lspace=".27777em" rspace=".27777em"
"&NotGreaterGreater;"                form="infix"    lspace=".27777em" rspace=".27777em"
"&NotGreaterLess;"                   form="infix"    lspace=".27777em" rspace=".27777em"
"&NotGreaterSlantEqual;"             form="infix"    lspace=".27777em" rspace=".27777em"
"&NotGreaterTilde;"                  form="infix"    lspace=".27777em" rspace=".27777em"
"&NotHumpDownHump;"                  form="infix"    lspace=".27777em" rspace=".27777em"
"&NotHumpEqual;"                     form="infix"    lspace=".27777em" rspace=".27777em"
"&NotLeftTriangle;"                  form="infix"    lspace=".27777em" rspace=".27777em"
"&NotLeftTriangleBar;"               form="infix"    lspace=".27777em" rspace=".27777em"
"&NotLeftTriangleEqual;"             form="infix"    lspace=".27777em" rspace=".27777em"
"&NotLess;"                          form="infix"    lspace=".27777em" rspace=".27777em"
"&NotLessEqual;"                     form="infix"    lspace=".27777em" rspace=".27777em"
"&NotLessFullEqual;"                 form="infix"    lspace=".27777em" rspace=".27777em"
"&NotLessGreater;"                   form="infix"    lspace=".27777em" rspace=".27777em"
"&NotLessLess;"                      form="infix"    lspace=".27777em" rspace=".27777em"
"&NotLessSlantEqual;"                form="infix"    lspace=".27777em" rspace=".27777em"
"&NotLessTilde;"                     form="infix"    lspace=".27777em" rspace=".27777em"
"&NotNestedGreaterGreater;"          form="infix"    lspace=".27777em" rspace=".27777em"
"&NotNestedLessLess;"                form="infix"    lspace=".27777em" rspace=".27777em"
"&NotPrecedes;"                      form="infix"    lspace=".27777em" rspace=".27777em"
"&NotPrecedesEqual;"                 form="infix"    lspace=".27777em" rspace=".27777em"
"&NotPrecedesSlantEqual;"            form="infix"    lspace=".27777em" rspace=".27777em"
"&NotPrecedesTilde;"                 form="infix"    lspace=".27777em" rspace=".27777em"
"&NotRightTriangle;"                 form="infix"    lspace=".27777em" rspace=".27777em"
"&NotRightTriangleBar;"              form="infix"    lspace=".27777em" rspace=".27777em"
"&NotRightTriangleEqual;"            form="infix"    lspace=".27777em" rspace=".27777em"
"&NotSucceeds;"                      form="infix"    lspace=".27777em" rspace=".27777em"
"&NotSucceedsEqual;"                 form="infix"    lspace=".27777em" rspace=".27777em"
"&NotSucceedsSlantEqual;"            form="infix"    lspace=".27777em" rspace=".27777em"
"&NotSucceedsTilde;"                 form="infix"    lspace=".27777em" rspace=".27777em"
"&NotTilde;"                         form="infix"    lspace=".27777em" rspace=".27777em"
"&NotTildeEqual;"                    form="infix"    lspace=".27777em" rspace=".27777em"
"&NotTildeFullEqual;"                form="infix"    lspace=".27777em" rspace=".27777em"
"&NotTildeTilde;"                    form="infix"    lspace=".27777em" rspace=".27777em"
"&NotVerticalBar;"                   form="infix"    lspace=".27777em" rspace=".27777em"
"&Precedes;"                         form="infix"    lspace=".27777em" rspace=".27777em"
"&PrecedesEqual;"                    form="infix"    lspace=".27777em" rspace=".27777em"
"&PrecedesSlantEqual;"               form="infix"    lspace=".27777em" rspace=".27777em"
"&PrecedesTilde;"                    form="infix"    lspace=".27777em" rspace=".27777em"
"&Proportion;"                       form="infix"    lspace=".27777em" rspace=".27777em"
"&Proportional;"                     form="infix"    lspace=".27777em" rspace=".27777em"
"&ReverseEquilibrium;"               form="infix"   stretchy="true"  lspace=".27777em" rspace=".27777em"
"&RightTriangle;"                    form="infix"    lspace=".27777em" rspace=".27777em"
"&RightTriangleBar;"                 form="infix"    lspace=".27777em" rspace=".27777em"
"&RightTriangleEqual;"               form="infix"    lspace=".27777em" rspace=".27777em"
"&Succeeds;"                         form="infix"    lspace=".27777em" rspace=".27777em"
"&SucceedsEqual;"                    form="infix"    lspace=".27777em" rspace=".27777em"
"&SucceedsSlantEqual;"               form="infix"    lspace=".27777em" rspace=".27777em"
"&SucceedsTilde;"                    form="infix"    lspace=".27777em" rspace=".27777em"
"&Tilde;"                            form="infix"    lspace=".27777em" rspace=".27777em"
"&TildeEqual;"                       form="infix"    lspace=".27777em" rspace=".27777em"
"&TildeFullEqual;"                   form="infix"    lspace=".27777em" rspace=".27777em"
"&TildeTilde;"                       form="infix"    lspace=".27777em" rspace=".27777em"
"&UpTee;"                            form="infix"    lspace=".27777em" rspace=".27777em"
"&VerticalBar;"                      form="infix"    lspace=".27777em" rspace=".27777em"

"&SquareUnion;"                      form="infix"   stretchy="true"  lspace=".22222em" rspace=".22222em"
"&Union;"                            form="infix"   stretchy="true"  lspace=".22222em" rspace=".22222em"
"&UnionPlus;"                        form="infix"   stretchy="true"  lspace=".22222em" rspace=".22222em"

"-"                                  form="infix"    lspace=".22222em" rspace=".22222em"
"+"                                  form="infix"    lspace=".22222em" rspace=".22222em"
"&Intersection;"                     form="infix"   stretchy="true"  lspace=".22222em" rspace=".22222em"
"&MinusPlus;"                        form="infix"    lspace=".22222em" rspace=".22222em"
"&PlusMinus;"                        form="infix"    lspace=".22222em" rspace=".22222em"
"&SquareIntersection;"               form="infix"   stretchy="true"  lspace=".22222em" rspace=".22222em"

"&Vee;"                              form="prefix"  largeop="true" movablelimits="true" stretchy="true"  lspace="0em" rspace=".16666em"
"&CircleMinus;"                      form="prefix"  largeop="true" movablelimits="true"  lspace="0em" rspace=".16666em"
"&CirclePlus;"                       form="prefix"  largeop="true" movablelimits="true"  lspace="0em" rspace=".16666em"
"&Sum;"                              form="prefix"  largeop="true" movablelimits="true" stretchy="true"  lspace="0em" rspace=".16666em"
"&Union;"                            form="prefix"  largeop="true" movablelimits="true" stretchy="true"  lspace="0em" rspace=".16666em"
"&UnionPlus;"                        form="prefix"  largeop="true" movablelimits="true" stretchy="true"  lspace="0em" rspace=".16666em"
"lim"                                form="prefix"  movablelimits="true"  lspace="0em" rspace=".16666em"
"max"                                form="prefix"  movablelimits="true"  lspace="0em" rspace=".16666em"
"min"                                form="prefix"  movablelimits="true"  lspace="0em" rspace=".16666em"

"&CircleMinus;"                      form="infix"    lspace=".16666em" rspace=".16666em"
"&CirclePlus;"                       form="infix"    lspace=".16666em" rspace=".16666em"

"&ClockwiseContourIntegral;"         form="prefix"  largeop="true" stretchy="true"  lspace="0em" rspace="0em"
"&ContourIntegral;"                  form="prefix"  largeop="true" stretchy="true"  lspace="0em" rspace="0em"
"&CounterClockwiseContourIntegral;"  form="prefix"  largeop="true" stretchy="true"  lspace="0em" rspace="0em"
"&DoubleContourIntegral;"            form="prefix"  largeop="true" stretchy="true"  lspace="0em" rspace="0em"
"&Integral;"                         form="prefix"  largeop="true" stretchy="true"  lspace="0em" rspace="0em"

"&Cup;"                              form="infix"    lspace=".16666em" rspace=".16666em"

"&Cap;"                              form="infix"    lspace=".16666em" rspace=".16666em"

"&VerticalTilde;"                    form="infix"    lspace=".16666em" rspace=".16666em"

"&Wedge;"                            form="prefix"  largeop="true" movablelimits="true" stretchy="true"  lspace="0em" rspace=".16666em"
"&CircleTimes;"                      form="prefix"  largeop="true" movablelimits="true"  lspace="0em" rspace=".16666em"
"&Coproduct;"                        form="prefix"  largeop="true" movablelimits="true" stretchy="true"  lspace="0em" rspace=".16666em"
"&Product;"                          form="prefix"  largeop="true" movablelimits="true" stretchy="true"  lspace="0em" rspace=".16666em"
"&Intersection;"                     form="prefix"  largeop="true" movablelimits="true" stretchy="true"  lspace="0em" rspace=".16666em"

"&Coproduct;"                        form="infix"    lspace=".16666em" rspace=".16666em"

"&Star;"                             form="infix"    lspace=".16666em" rspace=".16666em"

"&CircleDot;"                        form="prefix"  largeop="true" movablelimits="true"  lspace="0em" rspace=".16666em"

"*"                                  form="infix"    lspace=".16666em" rspace=".16666em"
"&InvisibleTimes;"                   form="infix"    lspace="0em" rspace="0em"

"&CenterDot;"                        form="infix"    lspace=".16666em" rspace=".16666em"

"&CircleTimes;"                      form="infix"    lspace=".16666em" rspace=".16666em"

"&Vee;"                              form="infix"    lspace=".16666em" rspace=".16666em"

"&Wedge;"                            form="infix"    lspace=".16666em" rspace=".16666em"

"&Diamond;"                          form="infix"    lspace=".16666em" rspace=".16666em"

"&Backslash;"                        form="infix"   stretchy="true"  lspace=".16666em" rspace=".16666em"

"/"                                  form="infix"   stretchy="true"  lspace=".16666em" rspace=".16666em"

"-"                                  form="prefix"   lspace="0em" rspace=".05555em"
"+"                                  form="prefix"   lspace="0em" rspace=".05555em"
"&MinusPlus;"                        form="prefix"   lspace="0em" rspace=".05555em"
"&PlusMinus;"                        form="prefix"   lspace="0em" rspace=".05555em"

"."                                  form="infix"    lspace="0em" rspace="0em"

"&Cross;"                            form="infix"    lspace=".11111em" rspace=".11111em"

"**"                                 form="infix"    lspace=".11111em" rspace=".11111em"

"&CircleDot;"                        form="infix"    lspace=".11111em" rspace=".11111em"

"&SmallCircle;"                      form="infix"    lspace=".11111em" rspace=".11111em"

"&Square;"                           form="prefix"   lspace="0em" rspace=".11111em"

"&Del;"                              form="prefix"   lspace="0em" rspace=".11111em"
"&PartialD;"                         form="prefix"   lspace="0em" rspace=".11111em"

"&CapitalDifferentialD;"             form="prefix"   lspace="0em" rspace=".11111em"
"&DifferentialD;"                    form="prefix"   lspace="0em" rspace=".11111em"

"&Sqrt;"                             form="prefix"  stretchy="true"  lspace="0em" rspace=".11111em"

"&DoubleDownArrow;"                  form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&DoubleLongLeftArrow;"              form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&DoubleLongLeftRightArrow;"         form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&DoubleLongRightArrow;"             form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&DoubleUpArrow;"                    form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&DoubleUpDownArrow;"                form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&DownArrow;"                        form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&DownArrowBar;"                     form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&DownArrowUpArrow;"                 form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&DownTeeArrow;"                     form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&LeftDownTeeVector;"                form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&LeftDownVector;"                   form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&LeftDownVectorBar;"                form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&LeftUpDownVector;"                 form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&LeftUpTeeVector;"                  form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&LeftUpVector;"                     form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&LeftUpVectorBar;"                  form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&LongLeftArrow;"                    form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&LongLeftRightArrow;"               form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&LongRightArrow;"                   form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&ReverseUpEquilibrium;"             form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&RightDownTeeVector;"               form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&RightDownVector;"                  form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&RightDownVectorBar;"               form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&RightUpDownVector;"                form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&RightUpTeeVector;"                 form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&RightUpVector;"                    form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&RightUpVectorBar;"                 form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&ShortDownArrow;"                   form="infix"    lspace=".11111em" rspace=".11111em"
"&ShortUpArrow;"                     form="infix"    lspace=".11111em" rspace=".11111em"
"&UpArrow;"                          form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&UpArrowBar;"                       form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&UpArrowDownArrow;"                 form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&UpDownArrow;"                      form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&UpEquilibrium;"                    form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"
"&UpTeeArrow;"                       form="infix"   stretchy="true"  lspace=".11111em" rspace=".11111em"

"^"                                  form="infix"    lspace=".11111em" rspace=".11111em"

"&lt;&gt;"                              form="infix"    lspace=".11111em" rspace=".11111em"

"'"                                  form="postfix"  lspace=".11111em" rspace="0em"

"!"                                  form="postfix"  lspace=".11111em" rspace="0em"
"!!"                                 form="postfix"  lspace=".11111em" rspace="0em"

"~"                                  form="infix"    lspace=".11111em" rspace=".11111em"

"@"                                  form="infix"    lspace=".11111em" rspace=".11111em"

"--"                                 form="postfix"  lspace=".11111em" rspace="0em"
"--"                                 form="prefix"   lspace="0em" rspace=".11111em"
"++"                                 form="postfix"  lspace=".11111em" rspace="0em"
"++"                                 form="prefix"   lspace="0em" rspace=".11111em"

"&ApplyFunction;"                    form="infix"    lspace="0em" rspace="0em"

"?"                                  form="infix"    lspace=".11111em" rspace=".11111em"

"_"                                  form="infix"    lspace=".11111em" rspace=".11111em"

"&Breve;"                            form="postfix" accent="true"  lspace="0em" rspace="0em"
"&Cedilla;"                          form="postfix" accent="true"  lspace="0em" rspace="0em"
"&DiacriticalGrave;"                 form="postfix" accent="true"  lspace="0em" rspace="0em"
"&DiacriticalDot;"                   form="postfix" accent="true"  lspace="0em" rspace="0em"
"&DiacriticalDoubleAcute;"           form="postfix" accent="true"  lspace="0em" rspace="0em"
"&DiacriticalLeftArrow;"             form="postfix" accent="true" stretchy="true"  lspace="0em" rspace="0em"
"&DiacriticalLeftRightArrow;"        form="postfix" accent="true" stretchy="true"  lspace="0em" rspace="0em"
"&DiacriticalLeftRightVector;"       form="postfix" accent="true" stretchy="true"  lspace="0em" rspace="0em"
"&DiacriticalLeftVector;"            form="postfix" accent="true" stretchy="true"  lspace="0em" rspace="0em"
"&DiacriticalAcute;"                 form="postfix" accent="true"  lspace="0em" rspace="0em"
"&DiacriticalRightArrow;"            form="postfix" accent="true" stretchy="true"  lspace="0em" rspace="0em"
"&DiacriticalRightVector;"           form="postfix" accent="true" stretchy="true"  lspace="0em" rspace="0em"
"&DiacriticalTilde;"                 form="postfix" accent="true" stretchy="true"  lspace="0em" rspace="0em"
"&DoubleDot;"                        form="postfix" accent="true"  lspace="0em" rspace="0em"
"&DownBreve;"                        form="postfix" accent="true"  lspace="0em" rspace="0em"
"&Hacek;"                            form="postfix" accent="true" stretchy="true"  lspace="0em" rspace="0em"
"&Hat;"                              form="postfix" accent="true" stretchy="true"  lspace="0em" rspace="0em"
"&OverBar;"                          form="postfix" accent="true" stretchy="true"  lspace="0em" rspace="0em"
"&OverBrace;"                        form="postfix" accent="true" stretchy="true"  lspace="0em" rspace="0em"
"&OverBracket;"                      form="postfix" accent="true" stretchy="true"  lspace="0em" rspace="0em"
"&OverParenthesis;"                  form="postfix" accent="true" stretchy="true"  lspace="0em" rspace="0em"
"&TripleDot;"                        form="postfix" accent="true"  lspace="0em" rspace="0em"
"&UnderBar;"                         form="postfix" accent="true" stretchy="true"  lspace="0em" rspace="0em"
"&UnderBrace;"                       form="postfix" accent="true" stretchy="true"  lspace="0em" rspace="0em"
"&UnderBracket;"                     form="postfix" accent="true" stretchy="true"  lspace="0em" rspace="0em"
"&UnderParenthesis;"                 form="postfix" accent="true" stretchy="true"  lspace="0em" rspace="0em"
MathMLOperatorDictionary

for (0..255) {
  # $escapes{chr($_)} = sprintf("%%%02X", $_);
#  $escapes{pack("c",$_)} = sprintf("%%%02X", $_);
  $escapes{pack("c",$_)} = sprintf("%04X", $_);
}


#The bit flags that will be used

$NS_MATHML_OPERATOR_FORM_INFIX    = '1';
$NS_MATHML_OPERATOR_FORM_PREFIX   = '2';
$NS_MATHML_OPERATOR_FORM_POSTFIX  = '3';
$NS_MATHML_OPERATOR_STRETCHY      = '(1<<2)';
$NS_MATHML_OPERATOR_FENCE         = '(1<<3)';
$NS_MATHML_OPERATOR_ACCENT        = '(1<<4)';
$NS_MATHML_OPERATOR_LARGEOP       = '(1<<5)';
$NS_MATHML_OPERATOR_SEPARATOR     = '(1<<6)';
$NS_MATHML_OPERATOR_MOVABLELIMITS = '(1<<7)';

@ATTRIBUTE = ("form",
              "stretchy",
              "fence",
              "accent",
              "largeop",
              "separator",
              "movablelimits",
              "lspace",
              "rpace");

$NUMFLAGS  = 7;

%FLAGS     = ("",                   '0',
              "0",                  '0',
              "form=infix",         $NS_MATHML_OPERATOR_FORM_INFIX,
              "form=prefix",        $NS_MATHML_OPERATOR_FORM_PREFIX,
              "form=postfix",       $NS_MATHML_OPERATOR_FORM_POSTFIX,
              "stretchy=true",      $NS_MATHML_OPERATOR_STRETCHY,
              "fence=true",         $NS_MATHML_OPERATOR_FENCE,
              "accent=true",        $NS_MATHML_OPERATOR_ACCENT,
              "largeop=true",       $NS_MATHML_OPERATOR_LARGEOP,
              "separator=true",     $NS_MATHML_OPERATOR_SEPARATOR,
              "movablelimits=true", $NS_MATHML_OPERATOR_MOVEABLELIMITS);

  # build the macro lists

  print "\n\nBuilding the operator list...\n";

  $MACRO_LIST_UNICHAR = $MACRO_LIST_UNIDATA = $MACRO_LIST_ENTDATA = "";

  $count = 0;
  @OPERATOR = split("\n",$DATA);

  $group = 0;
  for ($rank=0; $rank<=$#OPERATOR; ++$rank) {
    $data = $OPERATOR[$rank];
    $data =~ s#^\s+##;
    if ($data eq "") {
       ++$group;
       next;
    }


    delete @KEY{keys %KEY};
    delete @VALUES{keys %VALUES};

# Initialize with default values set by the REC :
$VALUE{'fence'} = 'false';
$VALUE{'separator'} = 'false'; 
$VALUE{'lspace'} = '.27777em';  
$VALUE{'rspace'} = '.27777em';  
$VALUE{'stretchy'} = 'false'; 
# $VALUE{'symmetric'} = 'true';
# $VALUE{'maxsize'} = 'infinity';
# $VALUE{'minsize'} = '1';
$VALUE{'largeop'} = 'false';
$VALUE{'movablelimits'} = 'false';
$VALUE{'accent'} = 'false';

    ($operator,$attributes) = ($1,$2) if $data =~ /(\S+)\s*(.*)\s*$/;
    while ($attributes =~ /(\S+)=\"([^"]*)\"/g) {
      ($name,$value) = ($1,$2);
      $VALUE{$name} = $value;
      $KEY{$name} = $name . '=' . $value;
    }

    $lspace = $VALUE{"lspace"}; $lspace =~ s/em//;
    $rspace = $VALUE{"rspace"}; $rspace =~ s/em//;
    $lspace .= ($lspace =~ m#\.#)? "f" : ".0f";
    $rspace .= ($rspace =~ m#\.#)? "f" : ".0f";

    # put all the flags together in an OR-list
    $enclose = 0;
    $flags = $FLAGS{$KEY{"form"}};
    for ($i=1; $i<$NUMFLAGS; ++$i) {
      $key = $KEY{$ATTRIBUTE[$i]};
      next if !$FLAGS{$key};
      $flags .= '|' . $FLAGS{$key}; 
      ++$enclose;
    }
    $flags = '(' . $flags . ')' if $enclose;
    
    $operator =~ s#[\"]##g;
    $string = $operator;
    $unichar = "";

    #go over each entity and find its unicode point - collate the results in unichar
    $unicodemissed = 0;
    $i = 0;
    $unistring = '';
    while ($string ne "" && $i < 5) { # there could be &ent1;&ent2;   and Perl is greedy... 
      $entity = ($string =~ m#^\&(.*?)\;#)? $1:""; # so here ue use the non-greedy modifier '?'
      if ($entity ne "") {
        $string =~ s#^\&${entity}\;##;       
        $unicode = $UNICODE{$entity};
        $unicodemissed = 1 if $unicode eq "";
        $unichar .= '\x' . $unicode;
        $unistring .= '0x' . $unicode . ',';
        
        $PUA{$unicode} = $entity if ($unicode ge "E000" && $unicode le "F8FF");
      }
      else {
        $entity = $1 if $string =~ m#^(.)#;
        $string =~ s#\S##;
        $entity =~ s/([\x00-\xFF])/$escapes{$1}/g;
        $unichar .= '\x' . $entity;
        $unistring .= '0x' . $entity . ',';
      }

      ++$i;
    }
    
    #some operators do not have unicode points, skip them !
    if ($unicodemissed) {
      print "Missing unicode for $operator... removing it from the dictionary...\n";
      next;
    }

#global outputs:

    #UNICHAR is zero-separated list of unicode points
    $unistring .= '0x0000';
    $MACRO_LIST_UNICHAR .= $unistring . ', // ' . "$group $operator $VALUE{'form'}\n";

    #UNIDATA is the table based on unicode points
    $MACRO_LIST_UNIDATA .= 'MATHML_OPERATOR(' . $count . ',"' . $unichar . '",';
    $MACRO_LIST_UNIDATA .= "$flags,$lspace,$rspace" . ') // ' . "$operator $VALUE{'form'}\n";
 

    #ENTDATA is the table based on &entity; names
    #little swap here  to get a listing based on the entity
    $string = $operator;
    $operator = $unichar; $operator =~ s#\\x# #g;
    $unichar = $string;
    $MACRO_LIST_ENTDATA .= 'MATHML_OPERATOR(' . $count . ',"' . $unichar . '",';
    $MACRO_LIST_ENTDATA .= "$flags,$lspace,$rspace" . ') // ' . "$operator $VALUE{'form'}\n";
 
    ++$count;
  }

#  $puacount = 0;
#  foreach $pua (keys %PUA) {
#    ++$puacount;
#    print "$puacount $pua $PUA{$pua}\n";
#  }
}
