KsqlGrammar.g4 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621
  1. grammar KsqlGrammar;
  2. tokens {
  3. DELIMITER
  4. }
  5. @lexer::members {
  6. public static final int COMMENTS = 2;
  7. public static final int WHITESPACE = 3;
  8. public static final int DIRECTIVES = 4;
  9. }
  10. statements
  11. : (singleStatement)* EOF
  12. ;
  13. testStatement
  14. : (singleStatement | assertStatement ';' | runScript ';') EOF?
  15. ;
  16. singleStatement
  17. : statement ';'
  18. ;
  19. singleExpression
  20. : expression EOF
  21. ;
  22. statement
  23. : query #queryStatement
  24. | (LIST | SHOW) PROPERTIES #listProperties
  25. | (LIST | SHOW) ALL? TOPICS EXTENDED? #listTopics
  26. | (LIST | SHOW) STREAMS EXTENDED? #listStreams
  27. | (LIST | SHOW) TABLES EXTENDED? #listTables
  28. | (LIST | SHOW) FUNCTIONS #listFunctions
  29. | (LIST | SHOW) (SOURCE | SINK)? CONNECTORS #listConnectors
  30. | (LIST | SHOW) CONNECTOR PLUGINS #listConnectorPlugins
  31. | (LIST | SHOW) TYPES #listTypes
  32. | (LIST | SHOW) VARIABLES #listVariables
  33. | DESCRIBE sourceName EXTENDED? #showColumns
  34. | DESCRIBE STREAMS EXTENDED? #describeStreams
  35. | DESCRIBE FUNCTION identifier #describeFunction
  36. | DESCRIBE CONNECTOR identifier #describeConnector
  37. | PRINT (identifier| STRING) printClause #printTopic
  38. | (LIST | SHOW) QUERIES EXTENDED? #listQueries
  39. | TERMINATE identifier #terminateQuery
  40. | TERMINATE ALL #terminateQuery
  41. | SET STRING EQ STRING #setProperty
  42. | UNSET STRING #unsetProperty
  43. | DEFINE variableName EQ variableValue #defineVariable
  44. | UNDEFINE variableName #undefineVariable
  45. | CREATE (OR REPLACE)? (SOURCE)? STREAM (IF NOT EXISTS)? sourceName
  46. (tableElements)?
  47. (WITH tableProperties)? #createStream
  48. | CREATE (OR REPLACE)? STREAM (IF NOT EXISTS)? sourceName
  49. (WITH tableProperties)? AS query #createStreamAs
  50. | CREATE (OR REPLACE)? (SOURCE)? TABLE (IF NOT EXISTS)? sourceName
  51. (tableElements)?
  52. (WITH tableProperties)? #createTable
  53. | CREATE (OR REPLACE)? TABLE (IF NOT EXISTS)? sourceName
  54. (WITH tableProperties)? AS query #createTableAs
  55. | CREATE (SINK | SOURCE) CONNECTOR (IF NOT EXISTS)? identifier
  56. WITH tableProperties #createConnector
  57. | INSERT INTO sourceName (WITH tableProperties)? query #insertInto
  58. | INSERT INTO sourceName (columns)? VALUES values #insertValues
  59. | DROP STREAM (IF EXISTS)? sourceName (DELETE TOPIC)? #dropStream
  60. | DROP TABLE (IF EXISTS)? sourceName (DELETE TOPIC)? #dropTable
  61. | DROP CONNECTOR (IF EXISTS)? identifier #dropConnector
  62. | EXPLAIN (statement | identifier) #explain
  63. | CREATE TYPE (IF NOT EXISTS)? identifier AS type #registerType
  64. | DROP TYPE (IF EXISTS)? identifier #dropType
  65. | ALTER (STREAM | TABLE) sourceName alterOption (',' alterOption)* #alterSource
  66. ;
  67. assertStatement
  68. : ASSERT VALUES sourceName (columns)? VALUES values #assertValues
  69. | ASSERT NULL VALUES sourceName (columns)? KEY values #assertTombstone
  70. | ASSERT STREAM sourceName (tableElements)? (WITH tableProperties)? #assertStream
  71. | ASSERT TABLE sourceName (tableElements)? (WITH tableProperties)? #assertTable
  72. ;
  73. runScript
  74. : RUN SCRIPT STRING
  75. ;
  76. query
  77. : SELECT selectItem (',' selectItem)*
  78. FROM from=relation
  79. (WINDOW windowExpression)?
  80. (WHERE where=booleanExpression)?
  81. (GROUP BY groupBy)?
  82. (PARTITION BY partitionBy)?
  83. (HAVING having=booleanExpression)?
  84. (EMIT resultMaterialization)?
  85. limitClause?
  86. ;
  87. resultMaterialization
  88. : CHANGES
  89. | FINAL
  90. ;
  91. alterOption
  92. : ADD (COLUMN)? identifier type
  93. ;
  94. tableElements
  95. : '(' tableElement (',' tableElement)* ')'
  96. ;
  97. tableElement
  98. : identifier type columnConstraints?
  99. ;
  100. columnConstraints
  101. : ((PRIMARY)? KEY)
  102. | HEADERS
  103. | HEADER '(' STRING ')'
  104. ;
  105. tableProperties
  106. : '(' tableProperty (',' tableProperty)* ')'
  107. ;
  108. tableProperty
  109. : (identifier | STRING) EQ literal
  110. ;
  111. printClause
  112. : (FROM BEGINNING)? intervalClause? limitClause?
  113. ;
  114. intervalClause
  115. : (INTERVAL | SAMPLE) number
  116. ;
  117. limitClause
  118. : LIMIT number
  119. ;
  120. retentionClause
  121. : RETENTION number windowUnit
  122. ;
  123. gracePeriodClause
  124. : GRACE PERIOD number windowUnit
  125. ;
  126. windowExpression
  127. : (IDENTIFIER)?
  128. ( tumblingWindowExpression | hoppingWindowExpression | sessionWindowExpression )
  129. ;
  130. tumblingWindowExpression
  131. : TUMBLING '(' SIZE number windowUnit (',' retentionClause)? (',' gracePeriodClause)?')'
  132. ;
  133. hoppingWindowExpression
  134. : HOPPING '(' SIZE number windowUnit ',' ADVANCE BY number windowUnit (',' retentionClause)? (',' gracePeriodClause)?')'
  135. ;
  136. sessionWindowExpression
  137. : SESSION '(' number windowUnit (',' retentionClause)? (',' gracePeriodClause)?')'
  138. ;
  139. windowUnit
  140. : DAY
  141. | HOUR
  142. | MINUTE
  143. | SECOND
  144. | MILLISECOND
  145. | DAYS
  146. | HOURS
  147. | MINUTES
  148. | SECONDS
  149. | MILLISECONDS
  150. ;
  151. groupBy
  152. : valueExpression (',' valueExpression)*
  153. | '(' (valueExpression (',' valueExpression)*)? ')'
  154. ;
  155. partitionBy
  156. : valueExpression (',' valueExpression)*
  157. | '(' (valueExpression (',' valueExpression)*)? ')'
  158. ;
  159. values
  160. : '(' (valueExpression (',' valueExpression)*)? ')'
  161. ;
  162. selectItem
  163. : expression (AS? identifier)? #selectSingle
  164. | identifier '.' ASTERISK #selectAll
  165. | ASTERISK #selectAll
  166. ;
  167. relation
  168. : left=aliasedRelation joinedSource+ #joinRelation
  169. | aliasedRelation #relationDefault
  170. ;
  171. joinedSource
  172. : joinType JOIN aliasedRelation joinWindow? joinCriteria
  173. ;
  174. joinType
  175. : INNER? #innerJoin
  176. | FULL OUTER? #outerJoin
  177. | LEFT OUTER? #leftJoin
  178. ;
  179. joinWindow
  180. : WITHIN withinExpression
  181. ;
  182. withinExpression
  183. : '(' joinWindowSize ',' joinWindowSize ')' (gracePeriodClause)? # joinWindowWithBeforeAndAfter
  184. | joinWindowSize (gracePeriodClause)? # singleJoinWindow
  185. ;
  186. joinWindowSize
  187. : number windowUnit
  188. ;
  189. joinCriteria
  190. : ON booleanExpression
  191. ;
  192. aliasedRelation
  193. : relationPrimary (AS? sourceName)?
  194. ;
  195. columns
  196. : '(' identifier (',' identifier)* ')'
  197. ;
  198. relationPrimary
  199. : sourceName #tableName
  200. ;
  201. expression
  202. : booleanExpression
  203. ;
  204. booleanExpression
  205. : predicated #booleanDefault
  206. | NOT booleanExpression #logicalNot
  207. | left=booleanExpression operator=AND right=booleanExpression #logicalBinary
  208. | left=booleanExpression operator=OR right=booleanExpression #logicalBinary
  209. ;
  210. predicated
  211. : valueExpression predicate[$valueExpression.ctx]?
  212. ;
  213. predicate[ParserRuleContext value]
  214. : comparisonOperator right=valueExpression #comparison
  215. | NOT? BETWEEN lower=valueExpression AND upper=valueExpression #between
  216. | NOT? IN '(' expression (',' expression)* ')' #inList
  217. | NOT? LIKE pattern=valueExpression (ESCAPE escape=STRING)? #like
  218. | IS NOT? NULL #nullPredicate
  219. | IS NOT? DISTINCT FROM right=valueExpression #distinctFrom
  220. ;
  221. valueExpression
  222. : primaryExpression #valueExpressionDefault
  223. | valueExpression AT timeZoneSpecifier #atTimeZone
  224. | operator=(MINUS | PLUS) valueExpression #arithmeticUnary
  225. | left=valueExpression operator=(ASTERISK | SLASH | PERCENT) right=valueExpression #arithmeticBinary
  226. | left=valueExpression operator=(PLUS | MINUS) right=valueExpression #arithmeticBinary
  227. | left=valueExpression CONCAT right=valueExpression #concatenation
  228. ;
  229. primaryExpression
  230. : literal #literalExpression
  231. | identifier STRING #typeConstructor
  232. | CASE valueExpression whenClause+ (ELSE elseExpression=expression)? END #simpleCase
  233. | CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase
  234. | CAST '(' expression AS type ')' #cast
  235. | ARRAY '[' (expression (',' expression)*)? ']' #arrayConstructor
  236. | MAP '(' (expression ASSIGN expression (',' expression ASSIGN expression)*)? ')' #mapConstructor
  237. | STRUCT '(' (identifier ASSIGN expression (',' identifier ASSIGN expression)*)? ')' #structConstructor
  238. | identifier '(' ASTERISK ')' #functionCall
  239. | identifier '(' (functionArgument (',' functionArgument)* (',' lambdaFunction)*)? ')' #functionCall
  240. | value=primaryExpression '[' index=valueExpression ']' #subscript
  241. | identifier #columnReference
  242. | identifier '.' identifier #qualifiedColumnReference
  243. | base=primaryExpression STRUCT_FIELD_REF fieldName=identifier #dereference
  244. | '(' expression ')' #parenthesizedExpression
  245. ;
  246. functionArgument
  247. : expression
  248. | windowUnit
  249. ;
  250. timeZoneSpecifier
  251. : TIME ZONE STRING #timeZoneString
  252. ;
  253. comparisonOperator
  254. : EQ | NEQ | LT | LTE | GT | GTE
  255. ;
  256. booleanValue
  257. : TRUE | FALSE
  258. ;
  259. type
  260. : type ARRAY
  261. | ARRAY '<' type '>'
  262. | MAP '<' type ',' type '>'
  263. | STRUCT '<' (identifier type (',' identifier type)*)? '>'
  264. | DECIMAL '(' number ',' number ')'
  265. | baseType ('(' typeParameter (',' typeParameter)* ')')?
  266. ;
  267. typeParameter
  268. : INTEGER_VALUE | 'STRING'
  269. ;
  270. baseType
  271. : identifier
  272. ;
  273. whenClause
  274. : WHEN condition=expression THEN result=expression
  275. ;
  276. identifier
  277. : VARIABLE #variableIdentifier
  278. | IDENTIFIER #unquotedIdentifier
  279. | QUOTED_IDENTIFIER #quotedIdentifierAlternative
  280. | nonReserved #unquotedIdentifier
  281. | BACKQUOTED_IDENTIFIER #backQuotedIdentifier
  282. | DIGIT_IDENTIFIER #digitIdentifier
  283. ;
  284. lambdaFunction
  285. : identifier '=>' expression #lambda
  286. | '(' identifier (',' identifier)* ')' '=>' expression #lambda
  287. ;
  288. variableName
  289. : IDENTIFIER
  290. ;
  291. variableValue
  292. : STRING
  293. ;
  294. sourceName
  295. : identifier
  296. ;
  297. number
  298. : MINUS? DECIMAL_VALUE #decimalLiteral
  299. | MINUS? FLOATING_POINT_VALUE #floatLiteral
  300. | MINUS? INTEGER_VALUE #integerLiteral
  301. ;
  302. literal
  303. : NULL #nullLiteral
  304. | number #numericLiteral
  305. | booleanValue #booleanLiteral
  306. | STRING #stringLiteral
  307. | VARIABLE #variableLiteral
  308. ;
  309. nonReserved
  310. : SHOW | TABLES | COLUMNS | COLUMN | PARTITIONS | FUNCTIONS | FUNCTION | SESSION
  311. | STRUCT | MAP | ARRAY | PARTITION
  312. | INTEGER | DATE | TIME | TIMESTAMP | INTERVAL | ZONE | 'STRING'
  313. | YEAR | MONTH | DAY | HOUR | MINUTE | SECOND
  314. | EXPLAIN | ANALYZE | TYPE | TYPES
  315. | SET | RESET
  316. | IF
  317. | SOURCE | SINK
  318. | PRIMARY | KEY
  319. | EMIT
  320. | CHANGES
  321. | FINAL
  322. | ESCAPE
  323. | REPLACE
  324. | ASSERT
  325. | ALTER
  326. | ADD
  327. ;
  328. EMIT: 'EMIT';
  329. CHANGES: 'CHANGES';
  330. FINAL: 'FINAL';
  331. SELECT: 'SELECT';
  332. FROM: 'FROM';
  333. AS: 'AS';
  334. ALL: 'ALL';
  335. DISTINCT: 'DISTINCT';
  336. WHERE: 'WHERE';
  337. WITHIN: 'WITHIN';
  338. WINDOW: 'WINDOW';
  339. GROUP: 'GROUP';
  340. BY: 'BY';
  341. HAVING: 'HAVING';
  342. LIMIT: 'LIMIT';
  343. AT: 'AT';
  344. OR: 'OR';
  345. AND: 'AND';
  346. IN: 'IN';
  347. NOT: 'NOT';
  348. EXISTS: 'EXISTS';
  349. BETWEEN: 'BETWEEN';
  350. LIKE: 'LIKE';
  351. ESCAPE: 'ESCAPE';
  352. IS: 'IS';
  353. NULL: 'NULL';
  354. TRUE: 'TRUE';
  355. FALSE: 'FALSE';
  356. INTEGER: 'INTEGER';
  357. DATE: 'DATE';
  358. TIME: 'TIME';
  359. TIMESTAMP: 'TIMESTAMP';
  360. INTERVAL: 'INTERVAL';
  361. YEAR: 'YEAR';
  362. MONTH: 'MONTH';
  363. DAY: 'DAY';
  364. HOUR: 'HOUR';
  365. MINUTE: 'MINUTE';
  366. SECOND: 'SECOND';
  367. MILLISECOND: 'MILLISECOND';
  368. YEARS: 'YEARS';
  369. MONTHS: 'MONTHS';
  370. DAYS: 'DAYS';
  371. HOURS: 'HOURS';
  372. MINUTES: 'MINUTES';
  373. SECONDS: 'SECONDS';
  374. MILLISECONDS: 'MILLISECONDS';
  375. ZONE: 'ZONE';
  376. TUMBLING: 'TUMBLING';
  377. HOPPING: 'HOPPING';
  378. SIZE: 'SIZE';
  379. ADVANCE: 'ADVANCE';
  380. RETENTION: 'RETENTION';
  381. GRACE: 'GRACE';
  382. PERIOD: 'PERIOD';
  383. CASE: 'CASE';
  384. WHEN: 'WHEN';
  385. THEN: 'THEN';
  386. ELSE: 'ELSE';
  387. END: 'END';
  388. JOIN: 'JOIN';
  389. FULL: 'FULL';
  390. OUTER: 'OUTER';
  391. INNER: 'INNER';
  392. LEFT: 'LEFT';
  393. RIGHT: 'RIGHT';
  394. ON: 'ON';
  395. PARTITION: 'PARTITION';
  396. STRUCT: 'STRUCT';
  397. WITH: 'WITH';
  398. VALUES: 'VALUES';
  399. CREATE: 'CREATE';
  400. TABLE: 'TABLE';
  401. TOPIC: 'TOPIC';
  402. STREAM: 'STREAM';
  403. STREAMS: 'STREAMS';
  404. INSERT: 'INSERT';
  405. DELETE: 'DELETE';
  406. INTO: 'INTO';
  407. DESCRIBE: 'DESCRIBE';
  408. EXTENDED: 'EXTENDED';
  409. PRINT: 'PRINT';
  410. EXPLAIN: 'EXPLAIN';
  411. ANALYZE: 'ANALYZE';
  412. TYPE: 'TYPE';
  413. TYPES: 'TYPES';
  414. CAST: 'CAST';
  415. SHOW: 'SHOW';
  416. LIST: 'LIST';
  417. TABLES: 'TABLES';
  418. TOPICS: 'TOPICS';
  419. QUERY: 'QUERY';
  420. QUERIES: 'QUERIES';
  421. TERMINATE: 'TERMINATE';
  422. LOAD: 'LOAD';
  423. COLUMNS: 'COLUMNS';
  424. COLUMN: 'COLUMN';
  425. PARTITIONS: 'PARTITIONS';
  426. FUNCTIONS: 'FUNCTIONS';
  427. FUNCTION: 'FUNCTION';
  428. DROP: 'DROP';
  429. TO: 'TO';
  430. RENAME: 'RENAME';
  431. ARRAY: 'ARRAY';
  432. MAP: 'MAP';
  433. SET: 'SET';
  434. DEFINE: 'DEFINE';
  435. UNDEFINE: 'UNDEFINE';
  436. RESET: 'RESET';
  437. SESSION: 'SESSION';
  438. SAMPLE: 'SAMPLE';
  439. EXPORT: 'EXPORT';
  440. CATALOG: 'CATALOG';
  441. PROPERTIES: 'PROPERTIES';
  442. BEGINNING: 'BEGINNING';
  443. UNSET: 'UNSET';
  444. RUN: 'RUN';
  445. SCRIPT: 'SCRIPT';
  446. DECIMAL: 'DECIMAL';
  447. KEY: 'KEY';
  448. CONNECTOR: 'CONNECTOR';
  449. CONNECTORS: 'CONNECTORS';
  450. SINK: 'SINK';
  451. SOURCE: 'SOURCE';
  452. NAMESPACE: 'NAMESPACE';
  453. MATERIALIZED: 'MATERIALIZED';
  454. VIEW: 'VIEW';
  455. PRIMARY: 'PRIMARY';
  456. REPLACE: 'REPLACE';
  457. ASSERT: 'ASSERT';
  458. ADD: 'ADD';
  459. ALTER: 'ALTER';
  460. VARIABLES: 'VARIABLES';
  461. PLUGINS: 'PLUGINS';
  462. HEADERS: 'HEADERS';
  463. HEADER: 'HEADER';
  464. IF: 'IF';
  465. EQ : '=';
  466. NEQ : '<>' | '!=';
  467. LT : '<';
  468. LTE : '<=';
  469. GT : '>';
  470. GTE : '>=';
  471. PLUS: '+';
  472. MINUS: '-';
  473. ASTERISK: '*';
  474. SLASH: '/';
  475. PERCENT: '%';
  476. CONCAT: '||';
  477. ASSIGN: ':=';
  478. STRUCT_FIELD_REF: '->';
  479. LAMBDA_EXPRESSION: '=>';
  480. STRING
  481. : '\'' ( ~'\'' | '\'\'' )* '\''
  482. ;
  483. INTEGER_VALUE
  484. : DIGIT+
  485. ;
  486. DECIMAL_VALUE
  487. : DIGIT+ '.' DIGIT*
  488. | '.' DIGIT+
  489. ;
  490. FLOATING_POINT_VALUE
  491. : DIGIT+ ('.' DIGIT*)? EXPONENT
  492. | '.' DIGIT+ EXPONENT
  493. ;
  494. IDENTIFIER
  495. : (LETTER | '_') (LETTER | DIGIT | '_' | '@' )*
  496. ;
  497. DIGIT_IDENTIFIER
  498. : DIGIT (LETTER | DIGIT | '_' | '@' )+
  499. ;
  500. QUOTED_IDENTIFIER
  501. : '"' ( ~'"' | '""' )* '"'
  502. ;
  503. BACKQUOTED_IDENTIFIER
  504. : '`' ( ~'`' | '``' )* '`'
  505. ;
  506. VARIABLE
  507. : '${' IDENTIFIER '}'
  508. ;
  509. fragment EXPONENT
  510. : 'E' [+-]? DIGIT+
  511. ;
  512. fragment DIGIT
  513. : [0-9]
  514. ;
  515. fragment LETTER
  516. : [A-Z]
  517. ;
  518. SIMPLE_COMMENT
  519. : '--' ~'@' ~[\r\n]* '\r'? '\n'? -> channel(2) // channel(COMMENTS)
  520. ;
  521. DIRECTIVE_COMMENT
  522. : '--@' ~[\r\n]* '\r'? '\n'? -> channel(4) // channel(DIRECTIVES)
  523. ;
  524. BRACKETED_COMMENT
  525. : '/*' .*? '*/' -> channel(2) // channel(COMMENTS)
  526. ;
  527. WS
  528. : [ \r\n\t]+ -> channel(3) // channel(WHITESPACE)
  529. ;
  530. // Catch-all for anything we can't recognize.
  531. // We use this to be able to ignore and recover all the text
  532. // when splitting statements with DelimiterLexer
  533. UNRECOGNIZED
  534. : .
  535. ;