summaryrefslogtreecommitdiff
path: root/util/file_types.py
blob: 3315a747fd1173b891998063d2535e55a8f73d49 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os

# lanuage type for each file extension
lang_types = {
    '.c'     : "C",
    '.h'     : "C",
    '.cc'    : "C++",
    '.hh'    : "C++",
    '.cxx'   : "C++",
    '.hxx'   : "C++",
    '.cpp'   : "C++",
    '.hpp'   : "C++",
    '.C'     : "C++",
    '.H'     : "C++",
    '.i'     : "swig",
    '.py'    : "python",
    '.pl'    : "perl",
    '.pm'    : "perl",
    '.s'     : "asm",
    '.S'     : "asm",
    '.l'     : "lex",
    '.ll'    : "lex",
    '.y'     : "yacc",
    '.yy'    : "yacc",
    '.isa'   : "isa",
    '.sh'    : "shell",
    '.slicc' : "slicc",
    '.sm'    : "slicc",
    '.awk'   : "awk",
    '.el'    : "lisp",
    '.txt'   : "text",
    '.tex'   : "tex",
    }

# languages based on file prefix
lang_prefixes = (
    ('SCons',    'scons'),
    ('Make',     'make'),
    ('make',     'make'),
    ('Doxyfile', 'doxygen'),
    )

# languages based on #! line of first file
hash_bang = (
    ('python', 'python'),
    ('perl',   'perl'),
    ('sh',     'shell'),
    )

# the list of all languages that we detect
all_languages = frozenset(lang_types.itervalues())
all_languages |= frozenset(lang for start,lang in lang_prefixes)
all_languages |= frozenset(lang for start,lang in hash_bang)

def lang_type(filename, firstline=None, openok=True):
    '''identify the language of a given filename and potentially the
    firstline of the file.  If the firstline of the file is not
    provided and openok is True, open the file and read the first line
    if necessary'''

    basename = os.path.basename(filename)
    name,extension = os.path.splitext(basename)

    # first try to detect language based on file extension
    try:
        return lang_types[extension]
    except KeyError:
        pass

    # now try to detect language based on file prefix
    for start,lang in lang_prefixes:
        if basename.startswith(start):
            return start

    # if a first line was not provided but the file is ok to open,
    # grab the first line of the file.
    if firstline is None and openok:
        handle = file(filename, 'r')
        firstline = handle.readline()
        handle.close()

    # try to detect language based on #! in first line
    if firstline and firstline.startswith('#!'):
        for string,lang in hash_bang:
            if firstline.find(string) > 0:
                return lang

    # sorry, we couldn't detect the language
    return None