JavaCC:a simple parser for search text with expression support

version 1:separated by white spaces

options
{
  static = false;
  UNICODE_INPUT = true;
}

PARSER_BEGIN(SearchText)
package me.changchao.javacc.hello.search.string;

public class SearchText
{
}

PARSER_END(SearchText)

TOKEN :
{
  < SEPARATOR : (" ")+ >
| < TERM : (~[ " " ])+ >
}

int count() :
{
  Token t;
  java.util.Vector s = new java.util.Vector();
}
{
  (
    < SEPARATOR >
    {
    }
  | t = < TERM >
    {
      s.addElement(t.image);
    }
  )*
  {
    return s.size();
  }
}

version 2:phrase retrieval

options
{
  static = false;
  UNICODE_INPUT = true;
}

PARSER_BEGIN(SearchText)
package me.changchao.javacc.hello.search.string;

public class SearchText
{
}

PARSER_END(SearchText)

TOKEN :
{
  < SEPARATOR : ([ " " ])+ >
| < TERM : (~[ " ", "\"" ])+ >
| < QUOTE : "\"" > : PHRASE_STATE
}

< PHRASE_STATE >
TOKEN :
{
  < ENDQUOTE : < QUOTE > > : DEFAULT
| < PHRASE : (~[ "\"" ])+ >
}

java.util.Vector < String > getTerms() :
{
  Token t;
  java.util.Vector < String > s = new java.util.Vector < String > ();
}
{
  (
    < SEPARATOR >
    {
    }
  | t = < TERM >
    {
      s.addElement(t.image);
    }
  | < QUOTE > t = < PHRASE > < ENDQUOTE >
    {
      s.addElement(t.image);
    }
  )*
  {
    return s;
  }
}

version 3:the final version

options
{
  static = false;
  UNICODE_INPUT = true;
  //  LOOKAHEAD=4;
  //  FORCE_LA_CHECK=true;
}

PARSER_BEGIN(SearchExpression)
package me.changchao.javacc.hello.search.expression;

public class SearchExpression
{
}

PARSER_END(SearchExpression)

TOKEN : /* OPERATORS */
{
  < PLUS : "+" >
| < MINUS : "-" >
| < UNARY_MINUS : "!" >
| < MULTIPLY : "*" >
}

TOKEN :
{
  < LEFT_P : "(" >
| < RIGHT_P : ")" >
}

TOKEN :
{
  < SEPARATOR : ([ " " ])+ >
| < TERM : (~[ " ", "\"", "(", ")" ])+ >
| < QUOTE : "\"" > : PHRASE_STATE
}

< PHRASE_STATE >
TOKEN :
{
  < ENDQUOTE : < QUOTE > > : DEFAULT
| < PHRASE : (~[ "\"" ])+ >
}

String getExpressionText() :
{
  StringBuffer sb = new StringBuffer();
  String s;
}
{
  (
  < SEPARATOR >
  )*
  (
    s = sum()
    {
      sb.append(s);
    }
  )
  (
  < SEPARATOR >)* 
  < EOF >
  {
    return sb.toString();
  }
}

String sum() :
{
  StringBuffer sb = new StringBuffer();
  String left;
  String right;
}
{
  left = term()
  {
    sb.insert(0, "(");
    sb.append(left);
  }
  (
    LOOKAHEAD(4)
    < SEPARATOR >
    (
      < PLUS >
      {
        sb.insert(0, "(");
        sb.append(" OR ");
      }
    | < MINUS >
      {
        sb.insert(0, "(");
        sb.append(" AND NOT ");
      }
    )
    < SEPARATOR > 
    right = term()
    {
      sb.append(right);
      sb.append(")");
    }
  )*
  {
    sb.append(")");
    return sb.toString();
  }
}

String term() :
{
  StringBuffer sb = new StringBuffer();
  String left;
  String right;
}
{
  left = unary()
  {
    sb.insert(0, "(");
    sb.append(left);
  }
  (
    LOOKAHEAD(4)
    < SEPARATOR > < MULTIPLY > < SEPARATOR > right = unary()
    {
      sb.insert(0, "(");
      sb.append(" AND " + right);
      sb.append(")");
    }
  )*
  {
    sb.append(")");
    return sb.toString();
  }
}

String unary() :
{
  int notCount = 0;
  String s;
}
{
  (
    < UNARY_MINUS >
    {
      notCount++;
    }
    < SEPARATOR > s = element()
  )+
  {
    return notCount % 2 == 0 ? s : ("(NOT " + s + " )");
  }
| s = element()
  {
    return "(" + s + ")";
  }
}

String element() :
{
  String s;
  Token t;
}
{
  t = < TERM >
  {
    return "\"" + t.image + "\"";
  }
| < QUOTE > t = < PHRASE > < ENDQUOTE >
  {
    return "\"" + t.image + "\"";
  }
| < LEFT_P > 
  (< SEPARATOR >)* 
  s = sum() 
  (< SEPARATOR >)* 
  < RIGHT_P >
  {
    return s;
  }
}