Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Windows & cl.exe/clang-cl.exe #128

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 56 additions & 8 deletions compiledb/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,14 @@
import bashlex
import re
import logging
import sys

from compiledb.compiler import get_compiler
from compiledb.utils import run_cmd

# Internal variables used to parse build log entries
cc_compile_regex = re.compile(r"^.*-?g?cc-?[0-9.]*$|^.*-?clang-?[0-9.]*$")
cpp_compile_regex = re.compile(r"^.*-?[gc]\+\+-?[0-9.]*$|^.*-?clang\+\+-?[0-9.]*$")
cc_compile_regex = re.compile(r"^.*-?g?cc-?[0-9.]*$|^.*-?clang-?[0-9.]*$^.*-?cl?exe$|^.*-?(clang-)?cl?(\.exe)?$")
cpp_compile_regex = re.compile(r"^.*-?[gc]\+\+-?[0-9.]*$|^.*-?clang\+\+-?[0-9.]*$|^.*-?(clang-)?cl?(\.exe)?$")
file_regex = re.compile(r"^.+\.c$|^.+\.cc$|^.+\.cpp$|^.+\.cxx$|^.+\.s$", re.IGNORECASE)
compiler_wrappers = {"ccache", "icecc", "sccache"}

Expand Down Expand Up @@ -137,7 +138,7 @@ def skip_line(cmd, reason):

# add entry to database
tokens = c['tokens']
arguments = [unescape(a) for a in tokens[len(wrappers):]]
arguments = [a for a in tokens[len(wrappers):]]

compiler = get_compiler(arguments[0])

Expand Down Expand Up @@ -178,12 +179,63 @@ def visitcommandsubstitution(self, n, cmd):
self.substs.append(n)
return False

def cross_platform_argline(s, platform='this'):
"""Multi-platform variant of shlex.split() for command-line splitting.
For use with subprocess, for argv injection etc. Using fast REGEX.

platform: 'this' = auto from current platform;
1 = POSIX;
0 = Windows/CMD
(other values reserved)

Cudos: https://stackoverflow.com/a/35900070/2349761
"""
if platform == 'this':
platform = (sys.platform != 'win32')
if platform == 1:
RE_CMD_LEX = r'''"((?:\\["\\]|[^"])*)"|'([^']*)'|(\\.)|(&&?|\|\|?|\d?\>|[<])|([^\s'"\\&|<>]+)|(\s+)|(.)'''
elif platform == 0:
RE_CMD_LEX = r'''"((?:""|\\["\\]|[^"])*)"?()|(\\\\(?=\\*")|\\")|(&&?|\|\|?|\d?>|[<])|([^\s"&|<>]+)|(\s+)|(.)'''
else:
raise AssertionError('unkown platform %r' % platform)

args = []
accu = None # collects pieces of one arg
for qs, qss, esc, pipe, word, white, fail in re.findall(RE_CMD_LEX, s):
if word:
if platform == 0:
word = word.replace('\\\\', '\\').replace('\\', '\\\\')
elif esc:
word = esc[1]
elif white or pipe:
if accu is not None:
args.append(accu)
if pipe:
args.append(pipe)
accu = None
continue
elif fail:
raise ValueError("invalid or incomplete shell string")
elif qs:
if platform == 0:
word = word.replace('""', '"')
word = repr(qs).encode().decode('unicode_escape')
else:
word = qss # may be even empty; must be last

accu = (accu or '') + word

if accu is not None:
args.append(accu)

return args

class CommandProcessor(bashlex.ast.nodevisitor):
"""Uses bashlex to parse and traverse the resulting bash AST
looking for and extracting compilation commands."""
@staticmethod
def process(line, wd):
args = cross_platform_argline(line)
line = " ".join(args)
trees = bashlex.parser.parse(line)
if not trees:
return []
Expand Down Expand Up @@ -254,8 +306,4 @@ def check_last_cmd(self):
# reset state to process new command
self.reset()


def unescape(s):
return s.encode().decode('unicode_escape')

# ex: ts=2 sw=4 et filetype=python