read.go 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958
  1. // Copyright 2018 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package modfile
  5. import (
  6. "bytes"
  7. "errors"
  8. "fmt"
  9. "os"
  10. "strconv"
  11. "strings"
  12. "unicode"
  13. "unicode/utf8"
  14. )
  15. // A Position describes an arbitrary source position in a file, including the
  16. // file, line, column, and byte offset.
  17. type Position struct {
  18. Line int // line in input (starting at 1)
  19. LineRune int // rune in line (starting at 1)
  20. Byte int // byte in input (starting at 0)
  21. }
  22. // add returns the position at the end of s, assuming it starts at p.
  23. func (p Position) add(s string) Position {
  24. p.Byte += len(s)
  25. if n := strings.Count(s, "\n"); n > 0 {
  26. p.Line += n
  27. s = s[strings.LastIndex(s, "\n")+1:]
  28. p.LineRune = 1
  29. }
  30. p.LineRune += utf8.RuneCountInString(s)
  31. return p
  32. }
  33. // An Expr represents an input element.
  34. type Expr interface {
  35. // Span returns the start and end position of the expression,
  36. // excluding leading or trailing comments.
  37. Span() (start, end Position)
  38. // Comment returns the comments attached to the expression.
  39. // This method would normally be named 'Comments' but that
  40. // would interfere with embedding a type of the same name.
  41. Comment() *Comments
  42. }
  43. // A Comment represents a single // comment.
  44. type Comment struct {
  45. Start Position
  46. Token string // without trailing newline
  47. Suffix bool // an end of line (not whole line) comment
  48. }
  49. // Comments collects the comments associated with an expression.
  50. type Comments struct {
  51. Before []Comment // whole-line comments before this expression
  52. Suffix []Comment // end-of-line comments after this expression
  53. // For top-level expressions only, After lists whole-line
  54. // comments following the expression.
  55. After []Comment
  56. }
  57. // Comment returns the receiver. This isn't useful by itself, but
  58. // a Comments struct is embedded into all the expression
  59. // implementation types, and this gives each of those a Comment
  60. // method to satisfy the Expr interface.
  61. func (c *Comments) Comment() *Comments {
  62. return c
  63. }
  64. // A FileSyntax represents an entire go.mod file.
  65. type FileSyntax struct {
  66. Name string // file path
  67. Comments
  68. Stmt []Expr
  69. }
  70. func (x *FileSyntax) Span() (start, end Position) {
  71. if len(x.Stmt) == 0 {
  72. return
  73. }
  74. start, _ = x.Stmt[0].Span()
  75. _, end = x.Stmt[len(x.Stmt)-1].Span()
  76. return start, end
  77. }
  78. // addLine adds a line containing the given tokens to the file.
  79. //
  80. // If the first token of the hint matches the first token of the
  81. // line, the new line is added at the end of the block containing hint,
  82. // extracting hint into a new block if it is not yet in one.
  83. //
  84. // If the hint is non-nil buts its first token does not match,
  85. // the new line is added after the block containing hint
  86. // (or hint itself, if not in a block).
  87. //
  88. // If no hint is provided, addLine appends the line to the end of
  89. // the last block with a matching first token,
  90. // or to the end of the file if no such block exists.
  91. func (x *FileSyntax) addLine(hint Expr, tokens ...string) *Line {
  92. if hint == nil {
  93. // If no hint given, add to the last statement of the given type.
  94. Loop:
  95. for i := len(x.Stmt) - 1; i >= 0; i-- {
  96. stmt := x.Stmt[i]
  97. switch stmt := stmt.(type) {
  98. case *Line:
  99. if stmt.Token != nil && stmt.Token[0] == tokens[0] {
  100. hint = stmt
  101. break Loop
  102. }
  103. case *LineBlock:
  104. if stmt.Token[0] == tokens[0] {
  105. hint = stmt
  106. break Loop
  107. }
  108. }
  109. }
  110. }
  111. newLineAfter := func(i int) *Line {
  112. new := &Line{Token: tokens}
  113. if i == len(x.Stmt) {
  114. x.Stmt = append(x.Stmt, new)
  115. } else {
  116. x.Stmt = append(x.Stmt, nil)
  117. copy(x.Stmt[i+2:], x.Stmt[i+1:])
  118. x.Stmt[i+1] = new
  119. }
  120. return new
  121. }
  122. if hint != nil {
  123. for i, stmt := range x.Stmt {
  124. switch stmt := stmt.(type) {
  125. case *Line:
  126. if stmt == hint {
  127. if stmt.Token == nil || stmt.Token[0] != tokens[0] {
  128. return newLineAfter(i)
  129. }
  130. // Convert line to line block.
  131. stmt.InBlock = true
  132. block := &LineBlock{Token: stmt.Token[:1], Line: []*Line{stmt}}
  133. stmt.Token = stmt.Token[1:]
  134. x.Stmt[i] = block
  135. new := &Line{Token: tokens[1:], InBlock: true}
  136. block.Line = append(block.Line, new)
  137. return new
  138. }
  139. case *LineBlock:
  140. if stmt == hint {
  141. if stmt.Token[0] != tokens[0] {
  142. return newLineAfter(i)
  143. }
  144. new := &Line{Token: tokens[1:], InBlock: true}
  145. stmt.Line = append(stmt.Line, new)
  146. return new
  147. }
  148. for j, line := range stmt.Line {
  149. if line == hint {
  150. if stmt.Token[0] != tokens[0] {
  151. return newLineAfter(i)
  152. }
  153. // Add new line after hint within the block.
  154. stmt.Line = append(stmt.Line, nil)
  155. copy(stmt.Line[j+2:], stmt.Line[j+1:])
  156. new := &Line{Token: tokens[1:], InBlock: true}
  157. stmt.Line[j+1] = new
  158. return new
  159. }
  160. }
  161. }
  162. }
  163. }
  164. new := &Line{Token: tokens}
  165. x.Stmt = append(x.Stmt, new)
  166. return new
  167. }
  168. func (x *FileSyntax) updateLine(line *Line, tokens ...string) {
  169. if line.InBlock {
  170. tokens = tokens[1:]
  171. }
  172. line.Token = tokens
  173. }
  174. // markRemoved modifies line so that it (and its end-of-line comment, if any)
  175. // will be dropped by (*FileSyntax).Cleanup.
  176. func (line *Line) markRemoved() {
  177. line.Token = nil
  178. line.Comments.Suffix = nil
  179. }
  180. // Cleanup cleans up the file syntax x after any edit operations.
  181. // To avoid quadratic behavior, (*Line).markRemoved marks the line as dead
  182. // by setting line.Token = nil but does not remove it from the slice
  183. // in which it appears. After edits have all been indicated,
  184. // calling Cleanup cleans out the dead lines.
  185. func (x *FileSyntax) Cleanup() {
  186. w := 0
  187. for _, stmt := range x.Stmt {
  188. switch stmt := stmt.(type) {
  189. case *Line:
  190. if stmt.Token == nil {
  191. continue
  192. }
  193. case *LineBlock:
  194. ww := 0
  195. for _, line := range stmt.Line {
  196. if line.Token != nil {
  197. stmt.Line[ww] = line
  198. ww++
  199. }
  200. }
  201. if ww == 0 {
  202. continue
  203. }
  204. if ww == 1 {
  205. // Collapse block into single line.
  206. line := &Line{
  207. Comments: Comments{
  208. Before: commentsAdd(stmt.Before, stmt.Line[0].Before),
  209. Suffix: commentsAdd(stmt.Line[0].Suffix, stmt.Suffix),
  210. After: commentsAdd(stmt.Line[0].After, stmt.After),
  211. },
  212. Token: stringsAdd(stmt.Token, stmt.Line[0].Token),
  213. }
  214. x.Stmt[w] = line
  215. w++
  216. continue
  217. }
  218. stmt.Line = stmt.Line[:ww]
  219. }
  220. x.Stmt[w] = stmt
  221. w++
  222. }
  223. x.Stmt = x.Stmt[:w]
  224. }
  225. func commentsAdd(x, y []Comment) []Comment {
  226. return append(x[:len(x):len(x)], y...)
  227. }
  228. func stringsAdd(x, y []string) []string {
  229. return append(x[:len(x):len(x)], y...)
  230. }
  231. // A CommentBlock represents a top-level block of comments separate
  232. // from any rule.
  233. type CommentBlock struct {
  234. Comments
  235. Start Position
  236. }
  237. func (x *CommentBlock) Span() (start, end Position) {
  238. return x.Start, x.Start
  239. }
  240. // A Line is a single line of tokens.
  241. type Line struct {
  242. Comments
  243. Start Position
  244. Token []string
  245. InBlock bool
  246. End Position
  247. }
  248. func (x *Line) Span() (start, end Position) {
  249. return x.Start, x.End
  250. }
  251. // A LineBlock is a factored block of lines, like
  252. //
  253. // require (
  254. // "x"
  255. // "y"
  256. // )
  257. type LineBlock struct {
  258. Comments
  259. Start Position
  260. LParen LParen
  261. Token []string
  262. Line []*Line
  263. RParen RParen
  264. }
  265. func (x *LineBlock) Span() (start, end Position) {
  266. return x.Start, x.RParen.Pos.add(")")
  267. }
  268. // An LParen represents the beginning of a parenthesized line block.
  269. // It is a place to store suffix comments.
  270. type LParen struct {
  271. Comments
  272. Pos Position
  273. }
  274. func (x *LParen) Span() (start, end Position) {
  275. return x.Pos, x.Pos.add(")")
  276. }
  277. // An RParen represents the end of a parenthesized line block.
  278. // It is a place to store whole-line (before) comments.
  279. type RParen struct {
  280. Comments
  281. Pos Position
  282. }
  283. func (x *RParen) Span() (start, end Position) {
  284. return x.Pos, x.Pos.add(")")
  285. }
  286. // An input represents a single input file being parsed.
  287. type input struct {
  288. // Lexing state.
  289. filename string // name of input file, for errors
  290. complete []byte // entire input
  291. remaining []byte // remaining input
  292. tokenStart []byte // token being scanned to end of input
  293. token token // next token to be returned by lex, peek
  294. pos Position // current input position
  295. comments []Comment // accumulated comments
  296. // Parser state.
  297. file *FileSyntax // returned top-level syntax tree
  298. parseErrors ErrorList // errors encountered during parsing
  299. // Comment assignment state.
  300. pre []Expr // all expressions, in preorder traversal
  301. post []Expr // all expressions, in postorder traversal
  302. }
  303. func newInput(filename string, data []byte) *input {
  304. return &input{
  305. filename: filename,
  306. complete: data,
  307. remaining: data,
  308. pos: Position{Line: 1, LineRune: 1, Byte: 0},
  309. }
  310. }
  311. // parse parses the input file.
  312. func parse(file string, data []byte) (f *FileSyntax, err error) {
  313. // The parser panics for both routine errors like syntax errors
  314. // and for programmer bugs like array index errors.
  315. // Turn both into error returns. Catching bug panics is
  316. // especially important when processing many files.
  317. in := newInput(file, data)
  318. defer func() {
  319. if e := recover(); e != nil && e != &in.parseErrors {
  320. in.parseErrors = append(in.parseErrors, Error{
  321. Filename: in.filename,
  322. Pos: in.pos,
  323. Err: fmt.Errorf("internal error: %v", e),
  324. })
  325. }
  326. if err == nil && len(in.parseErrors) > 0 {
  327. err = in.parseErrors
  328. }
  329. }()
  330. // Prime the lexer by reading in the first token. It will be available
  331. // in the next peek() or lex() call.
  332. in.readToken()
  333. // Invoke the parser.
  334. in.parseFile()
  335. if len(in.parseErrors) > 0 {
  336. return nil, in.parseErrors
  337. }
  338. in.file.Name = in.filename
  339. // Assign comments to nearby syntax.
  340. in.assignComments()
  341. return in.file, nil
  342. }
  343. // Error is called to report an error.
  344. // Error does not return: it panics.
  345. func (in *input) Error(s string) {
  346. in.parseErrors = append(in.parseErrors, Error{
  347. Filename: in.filename,
  348. Pos: in.pos,
  349. Err: errors.New(s),
  350. })
  351. panic(&in.parseErrors)
  352. }
  353. // eof reports whether the input has reached end of file.
  354. func (in *input) eof() bool {
  355. return len(in.remaining) == 0
  356. }
  357. // peekRune returns the next rune in the input without consuming it.
  358. func (in *input) peekRune() int {
  359. if len(in.remaining) == 0 {
  360. return 0
  361. }
  362. r, _ := utf8.DecodeRune(in.remaining)
  363. return int(r)
  364. }
  365. // peekPrefix reports whether the remaining input begins with the given prefix.
  366. func (in *input) peekPrefix(prefix string) bool {
  367. // This is like bytes.HasPrefix(in.remaining, []byte(prefix))
  368. // but without the allocation of the []byte copy of prefix.
  369. for i := 0; i < len(prefix); i++ {
  370. if i >= len(in.remaining) || in.remaining[i] != prefix[i] {
  371. return false
  372. }
  373. }
  374. return true
  375. }
  376. // readRune consumes and returns the next rune in the input.
  377. func (in *input) readRune() int {
  378. if len(in.remaining) == 0 {
  379. in.Error("internal lexer error: readRune at EOF")
  380. }
  381. r, size := utf8.DecodeRune(in.remaining)
  382. in.remaining = in.remaining[size:]
  383. if r == '\n' {
  384. in.pos.Line++
  385. in.pos.LineRune = 1
  386. } else {
  387. in.pos.LineRune++
  388. }
  389. in.pos.Byte += size
  390. return int(r)
  391. }
  392. type token struct {
  393. kind tokenKind
  394. pos Position
  395. endPos Position
  396. text string
  397. }
  398. type tokenKind int
  399. const (
  400. _EOF tokenKind = -(iota + 1)
  401. _EOLCOMMENT
  402. _IDENT
  403. _STRING
  404. _COMMENT
  405. // newlines and punctuation tokens are allowed as ASCII codes.
  406. )
  407. func (k tokenKind) isComment() bool {
  408. return k == _COMMENT || k == _EOLCOMMENT
  409. }
  410. // isEOL returns whether a token terminates a line.
  411. func (k tokenKind) isEOL() bool {
  412. return k == _EOF || k == _EOLCOMMENT || k == '\n'
  413. }
  414. // startToken marks the beginning of the next input token.
  415. // It must be followed by a call to endToken, once the token's text has
  416. // been consumed using readRune.
  417. func (in *input) startToken() {
  418. in.tokenStart = in.remaining
  419. in.token.text = ""
  420. in.token.pos = in.pos
  421. }
  422. // endToken marks the end of an input token.
  423. // It records the actual token string in tok.text.
  424. // A single trailing newline (LF or CRLF) will be removed from comment tokens.
  425. func (in *input) endToken(kind tokenKind) {
  426. in.token.kind = kind
  427. text := string(in.tokenStart[:len(in.tokenStart)-len(in.remaining)])
  428. if kind.isComment() {
  429. if strings.HasSuffix(text, "\r\n") {
  430. text = text[:len(text)-2]
  431. } else {
  432. text = strings.TrimSuffix(text, "\n")
  433. }
  434. }
  435. in.token.text = text
  436. in.token.endPos = in.pos
  437. }
  438. // peek returns the kind of the next token returned by lex.
  439. func (in *input) peek() tokenKind {
  440. return in.token.kind
  441. }
  442. // lex is called from the parser to obtain the next input token.
  443. func (in *input) lex() token {
  444. tok := in.token
  445. in.readToken()
  446. return tok
  447. }
  448. // readToken lexes the next token from the text and stores it in in.token.
  449. func (in *input) readToken() {
  450. // Skip past spaces, stopping at non-space or EOF.
  451. for !in.eof() {
  452. c := in.peekRune()
  453. if c == ' ' || c == '\t' || c == '\r' {
  454. in.readRune()
  455. continue
  456. }
  457. // Comment runs to end of line.
  458. if in.peekPrefix("//") {
  459. in.startToken()
  460. // Is this comment the only thing on its line?
  461. // Find the last \n before this // and see if it's all
  462. // spaces from there to here.
  463. i := bytes.LastIndex(in.complete[:in.pos.Byte], []byte("\n"))
  464. suffix := len(bytes.TrimSpace(in.complete[i+1:in.pos.Byte])) > 0
  465. in.readRune()
  466. in.readRune()
  467. // Consume comment.
  468. for len(in.remaining) > 0 && in.readRune() != '\n' {
  469. }
  470. // If we are at top level (not in a statement), hand the comment to
  471. // the parser as a _COMMENT token. The grammar is written
  472. // to handle top-level comments itself.
  473. if !suffix {
  474. in.endToken(_COMMENT)
  475. return
  476. }
  477. // Otherwise, save comment for later attachment to syntax tree.
  478. in.endToken(_EOLCOMMENT)
  479. in.comments = append(in.comments, Comment{in.token.pos, in.token.text, suffix})
  480. return
  481. }
  482. if in.peekPrefix("/*") {
  483. in.Error("mod files must use // comments (not /* */ comments)")
  484. }
  485. // Found non-space non-comment.
  486. break
  487. }
  488. // Found the beginning of the next token.
  489. in.startToken()
  490. // End of file.
  491. if in.eof() {
  492. in.endToken(_EOF)
  493. return
  494. }
  495. // Punctuation tokens.
  496. switch c := in.peekRune(); c {
  497. case '\n', '(', ')', '[', ']', '{', '}', ',':
  498. in.readRune()
  499. in.endToken(tokenKind(c))
  500. return
  501. case '"', '`': // quoted string
  502. quote := c
  503. in.readRune()
  504. for {
  505. if in.eof() {
  506. in.pos = in.token.pos
  507. in.Error("unexpected EOF in string")
  508. }
  509. if in.peekRune() == '\n' {
  510. in.Error("unexpected newline in string")
  511. }
  512. c := in.readRune()
  513. if c == quote {
  514. break
  515. }
  516. if c == '\\' && quote != '`' {
  517. if in.eof() {
  518. in.pos = in.token.pos
  519. in.Error("unexpected EOF in string")
  520. }
  521. in.readRune()
  522. }
  523. }
  524. in.endToken(_STRING)
  525. return
  526. }
  527. // Checked all punctuation. Must be identifier token.
  528. if c := in.peekRune(); !isIdent(c) {
  529. in.Error(fmt.Sprintf("unexpected input character %#q", c))
  530. }
  531. // Scan over identifier.
  532. for isIdent(in.peekRune()) {
  533. if in.peekPrefix("//") {
  534. break
  535. }
  536. if in.peekPrefix("/*") {
  537. in.Error("mod files must use // comments (not /* */ comments)")
  538. }
  539. in.readRune()
  540. }
  541. in.endToken(_IDENT)
  542. }
  543. // isIdent reports whether c is an identifier rune.
  544. // We treat most printable runes as identifier runes, except for a handful of
  545. // ASCII punctuation characters.
  546. func isIdent(c int) bool {
  547. switch r := rune(c); r {
  548. case ' ', '(', ')', '[', ']', '{', '}', ',':
  549. return false
  550. default:
  551. return !unicode.IsSpace(r) && unicode.IsPrint(r)
  552. }
  553. }
  554. // Comment assignment.
  555. // We build two lists of all subexpressions, preorder and postorder.
  556. // The preorder list is ordered by start location, with outer expressions first.
  557. // The postorder list is ordered by end location, with outer expressions last.
  558. // We use the preorder list to assign each whole-line comment to the syntax
  559. // immediately following it, and we use the postorder list to assign each
  560. // end-of-line comment to the syntax immediately preceding it.
  561. // order walks the expression adding it and its subexpressions to the
  562. // preorder and postorder lists.
  563. func (in *input) order(x Expr) {
  564. if x != nil {
  565. in.pre = append(in.pre, x)
  566. }
  567. switch x := x.(type) {
  568. default:
  569. panic(fmt.Errorf("order: unexpected type %T", x))
  570. case nil:
  571. // nothing
  572. case *LParen, *RParen:
  573. // nothing
  574. case *CommentBlock:
  575. // nothing
  576. case *Line:
  577. // nothing
  578. case *FileSyntax:
  579. for _, stmt := range x.Stmt {
  580. in.order(stmt)
  581. }
  582. case *LineBlock:
  583. in.order(&x.LParen)
  584. for _, l := range x.Line {
  585. in.order(l)
  586. }
  587. in.order(&x.RParen)
  588. }
  589. if x != nil {
  590. in.post = append(in.post, x)
  591. }
  592. }
  593. // assignComments attaches comments to nearby syntax.
  594. func (in *input) assignComments() {
  595. const debug = false
  596. // Generate preorder and postorder lists.
  597. in.order(in.file)
  598. // Split into whole-line comments and suffix comments.
  599. var line, suffix []Comment
  600. for _, com := range in.comments {
  601. if com.Suffix {
  602. suffix = append(suffix, com)
  603. } else {
  604. line = append(line, com)
  605. }
  606. }
  607. if debug {
  608. for _, c := range line {
  609. fmt.Fprintf(os.Stderr, "LINE %q :%d:%d #%d\n", c.Token, c.Start.Line, c.Start.LineRune, c.Start.Byte)
  610. }
  611. }
  612. // Assign line comments to syntax immediately following.
  613. for _, x := range in.pre {
  614. start, _ := x.Span()
  615. if debug {
  616. fmt.Fprintf(os.Stderr, "pre %T :%d:%d #%d\n", x, start.Line, start.LineRune, start.Byte)
  617. }
  618. xcom := x.Comment()
  619. for len(line) > 0 && start.Byte >= line[0].Start.Byte {
  620. if debug {
  621. fmt.Fprintf(os.Stderr, "ASSIGN LINE %q #%d\n", line[0].Token, line[0].Start.Byte)
  622. }
  623. xcom.Before = append(xcom.Before, line[0])
  624. line = line[1:]
  625. }
  626. }
  627. // Remaining line comments go at end of file.
  628. in.file.After = append(in.file.After, line...)
  629. if debug {
  630. for _, c := range suffix {
  631. fmt.Fprintf(os.Stderr, "SUFFIX %q :%d:%d #%d\n", c.Token, c.Start.Line, c.Start.LineRune, c.Start.Byte)
  632. }
  633. }
  634. // Assign suffix comments to syntax immediately before.
  635. for i := len(in.post) - 1; i >= 0; i-- {
  636. x := in.post[i]
  637. start, end := x.Span()
  638. if debug {
  639. fmt.Fprintf(os.Stderr, "post %T :%d:%d #%d :%d:%d #%d\n", x, start.Line, start.LineRune, start.Byte, end.Line, end.LineRune, end.Byte)
  640. }
  641. // Do not assign suffix comments to end of line block or whole file.
  642. // Instead assign them to the last element inside.
  643. switch x.(type) {
  644. case *FileSyntax:
  645. continue
  646. }
  647. // Do not assign suffix comments to something that starts
  648. // on an earlier line, so that in
  649. //
  650. // x ( y
  651. // z ) // comment
  652. //
  653. // we assign the comment to z and not to x ( ... ).
  654. if start.Line != end.Line {
  655. continue
  656. }
  657. xcom := x.Comment()
  658. for len(suffix) > 0 && end.Byte <= suffix[len(suffix)-1].Start.Byte {
  659. if debug {
  660. fmt.Fprintf(os.Stderr, "ASSIGN SUFFIX %q #%d\n", suffix[len(suffix)-1].Token, suffix[len(suffix)-1].Start.Byte)
  661. }
  662. xcom.Suffix = append(xcom.Suffix, suffix[len(suffix)-1])
  663. suffix = suffix[:len(suffix)-1]
  664. }
  665. }
  666. // We assigned suffix comments in reverse.
  667. // If multiple suffix comments were appended to the same
  668. // expression node, they are now in reverse. Fix that.
  669. for _, x := range in.post {
  670. reverseComments(x.Comment().Suffix)
  671. }
  672. // Remaining suffix comments go at beginning of file.
  673. in.file.Before = append(in.file.Before, suffix...)
  674. }
  675. // reverseComments reverses the []Comment list.
  676. func reverseComments(list []Comment) {
  677. for i, j := 0, len(list)-1; i < j; i, j = i+1, j-1 {
  678. list[i], list[j] = list[j], list[i]
  679. }
  680. }
  681. func (in *input) parseFile() {
  682. in.file = new(FileSyntax)
  683. var cb *CommentBlock
  684. for {
  685. switch in.peek() {
  686. case '\n':
  687. in.lex()
  688. if cb != nil {
  689. in.file.Stmt = append(in.file.Stmt, cb)
  690. cb = nil
  691. }
  692. case _COMMENT:
  693. tok := in.lex()
  694. if cb == nil {
  695. cb = &CommentBlock{Start: tok.pos}
  696. }
  697. com := cb.Comment()
  698. com.Before = append(com.Before, Comment{Start: tok.pos, Token: tok.text})
  699. case _EOF:
  700. if cb != nil {
  701. in.file.Stmt = append(in.file.Stmt, cb)
  702. }
  703. return
  704. default:
  705. in.parseStmt()
  706. if cb != nil {
  707. in.file.Stmt[len(in.file.Stmt)-1].Comment().Before = cb.Before
  708. cb = nil
  709. }
  710. }
  711. }
  712. }
  713. func (in *input) parseStmt() {
  714. tok := in.lex()
  715. start := tok.pos
  716. end := tok.endPos
  717. tokens := []string{tok.text}
  718. for {
  719. tok := in.lex()
  720. switch {
  721. case tok.kind.isEOL():
  722. in.file.Stmt = append(in.file.Stmt, &Line{
  723. Start: start,
  724. Token: tokens,
  725. End: end,
  726. })
  727. return
  728. case tok.kind == '(':
  729. if next := in.peek(); next.isEOL() {
  730. // Start of block: no more tokens on this line.
  731. in.file.Stmt = append(in.file.Stmt, in.parseLineBlock(start, tokens, tok))
  732. return
  733. } else if next == ')' {
  734. rparen := in.lex()
  735. if in.peek().isEOL() {
  736. // Empty block.
  737. in.lex()
  738. in.file.Stmt = append(in.file.Stmt, &LineBlock{
  739. Start: start,
  740. Token: tokens,
  741. LParen: LParen{Pos: tok.pos},
  742. RParen: RParen{Pos: rparen.pos},
  743. })
  744. return
  745. }
  746. // '( )' in the middle of the line, not a block.
  747. tokens = append(tokens, tok.text, rparen.text)
  748. } else {
  749. // '(' in the middle of the line, not a block.
  750. tokens = append(tokens, tok.text)
  751. }
  752. default:
  753. tokens = append(tokens, tok.text)
  754. end = tok.endPos
  755. }
  756. }
  757. }
  758. func (in *input) parseLineBlock(start Position, token []string, lparen token) *LineBlock {
  759. x := &LineBlock{
  760. Start: start,
  761. Token: token,
  762. LParen: LParen{Pos: lparen.pos},
  763. }
  764. var comments []Comment
  765. for {
  766. switch in.peek() {
  767. case _EOLCOMMENT:
  768. // Suffix comment, will be attached later by assignComments.
  769. in.lex()
  770. case '\n':
  771. // Blank line. Add an empty comment to preserve it.
  772. in.lex()
  773. if len(comments) == 0 && len(x.Line) > 0 || len(comments) > 0 && comments[len(comments)-1].Token != "" {
  774. comments = append(comments, Comment{})
  775. }
  776. case _COMMENT:
  777. tok := in.lex()
  778. comments = append(comments, Comment{Start: tok.pos, Token: tok.text})
  779. case _EOF:
  780. in.Error(fmt.Sprintf("syntax error (unterminated block started at %s:%d:%d)", in.filename, x.Start.Line, x.Start.LineRune))
  781. case ')':
  782. rparen := in.lex()
  783. x.RParen.Before = comments
  784. x.RParen.Pos = rparen.pos
  785. if !in.peek().isEOL() {
  786. in.Error("syntax error (expected newline after closing paren)")
  787. }
  788. in.lex()
  789. return x
  790. default:
  791. l := in.parseLine()
  792. x.Line = append(x.Line, l)
  793. l.Comment().Before = comments
  794. comments = nil
  795. }
  796. }
  797. }
  798. func (in *input) parseLine() *Line {
  799. tok := in.lex()
  800. if tok.kind.isEOL() {
  801. in.Error("internal parse error: parseLine at end of line")
  802. }
  803. start := tok.pos
  804. end := tok.endPos
  805. tokens := []string{tok.text}
  806. for {
  807. tok := in.lex()
  808. if tok.kind.isEOL() {
  809. return &Line{
  810. Start: start,
  811. Token: tokens,
  812. End: end,
  813. InBlock: true,
  814. }
  815. }
  816. tokens = append(tokens, tok.text)
  817. end = tok.endPos
  818. }
  819. }
  820. var (
  821. slashSlash = []byte("//")
  822. moduleStr = []byte("module")
  823. )
  824. // ModulePath returns the module path from the gomod file text.
  825. // If it cannot find a module path, it returns an empty string.
  826. // It is tolerant of unrelated problems in the go.mod file.
  827. func ModulePath(mod []byte) string {
  828. for len(mod) > 0 {
  829. line := mod
  830. mod = nil
  831. if i := bytes.IndexByte(line, '\n'); i >= 0 {
  832. line, mod = line[:i], line[i+1:]
  833. }
  834. if i := bytes.Index(line, slashSlash); i >= 0 {
  835. line = line[:i]
  836. }
  837. line = bytes.TrimSpace(line)
  838. if !bytes.HasPrefix(line, moduleStr) {
  839. continue
  840. }
  841. line = line[len(moduleStr):]
  842. n := len(line)
  843. line = bytes.TrimSpace(line)
  844. if len(line) == n || len(line) == 0 {
  845. continue
  846. }
  847. if line[0] == '"' || line[0] == '`' {
  848. p, err := strconv.Unquote(string(line))
  849. if err != nil {
  850. return "" // malformed quoted string or multiline module path
  851. }
  852. return p
  853. }
  854. return string(line)
  855. }
  856. return "" // missing module path
  857. }