import string
import os.path
import sys
import import_re
import glob
from Util import *
exec_recdict = globals().copy()
_run_as_program = 0
_def_suffix_list = [
("aap", "aap"),
("abc", "abc"),
("abl", "abel"),
("wrm", "acedb"),
("ada", "ada"),
("adb", "ada"),
("ads", "ada"),
("afd", "afd"),
("tdf", "ahdl"),
("aml", "aml"),
("run", "ampl"),
("a", "asm"),
("asm", "asm"),
("lst", "asm"),
("mac", "asm"),
("s", "asm"),
("asn", "asn"),
("asn1", "asn"),
("asa", "aspvbs"),
("as", "atlas"),
("atl", "atlas"),
("ave", "ave"),
("awk", "awk"),
("imp", "b"),
("mch", "b"),
("ref", "b"),
("bc", "bc"),
("bdf", "bdf"),
("bib", "bib"),
("bl", "blank"),
("btm", "btm"),
("c", "c"),
("cdl", "cdl"),
("cfi", "cf"),
("cfm", "cf"),
("chs", "chaskell"),
("eni", "cl"),
("dcl", "clean"),
("icl", "clean"),
("prg", "clipper"),
("cbl", "cobol"),
("cob", "cobol"),
("cpy", "cobol"),
("c++", "cpp"),
("cc", "cpp"),
("cpp", "cpp"),
("cxx", "cpp"),
("h", "cpp"),
("hh", "cpp"),
("hpp", "cpp"),
("hxx", "cpp"),
("inl", "cpp"),
("tcc", "cpp"),
("cs", "cs"),
("csc", "csc"),
("csh", "csh"),
("tcsh", "csh"),
("csp", "csp"),
("fdr", "csp"),
("css", "css"),
("con", "cterm"),
("pld", "cupl"),
("si", "cuplsim"),
("cyn", "cynpp"),
("d", "d"),
("def", "def"),
("desc", "desc"),
("diff", "diff"),
("patch", "diff"),
("rej", "diff"),
("bat", "dosbatch"),
("cmd", "dosbatch"),
("sys", "dosbatch"),
("ini", "dosini"),
("dot", "dot"),
("drac", "dracula"),
("drc", "dracula"),
("dsl", "dsl"),
("dtd", "dtd"),
("dylan", "dylan"),
("intr", "dylanintr"),
("lid", "dylanlid"),
("ecd", "ecd"),
("am", "elf"),
("erl", "erlang"),
("EC", "esqlc"),
("ec", "esqlc"),
("exp", "expect"),
("4gh", "fgl"),
("4gl", "fgl"),
("m4gl", "fgl"),
("fex", "focexec"),
("focexec", "focexec"),
("fs", "forth"),
("ft", "forth"),
("F", "fortran"),
("f", "fortran"),
("f77", "fortran"),
("f90", "fortran"),
("f95", "fortran"),
("for", "fortran"),
("fpp", "fortran"),
("ftn", "fortran"),
("gdmo", "gdmo"),
("mo", "gdmo"),
("ged", "gedcom"),
("gif", "gif"),
("gpi", "gnuplot"),
("gp", "gp"),
("gsp", "gsp"),
("hs", "haskell"),
("hb", "hb"),
("errsum", "hercules"),
("ev", "hercules"),
("rs", "hercules"),
("sum", "hercules"),
("vc", "hercules"),
("h32", "hex"),
("hex", "hex"),
("hog", "hog"),
("rules", "hog"),
("htm", "html"),
("htm", "html"),
("html", "html"),
("html", "html"),
("shtml", "html"),
("html.m4", "htmlm4"),
("icn", "icon"),
("idl", "idl"),
("Z", "ignore"),
("bak", "ignore"),
("bz2", "ignore"),
("gz", "ignore"),
("in", "ignore"),
("new", "ignore"),
("old", "ignore"),
("orig", "ignore"),
("rmpnew", "ignore"),
("rpmsave", "ignore"),
("indent.pro", "indent"),
("INF", "inform"),
("inf", "inform"),
("iss", "iss"),
("ist", "ist"),
("mst", "ist"),
("jpl", "jam"),
("jpr", "jam"),
("jav", "java"),
("java", "java"),
("jj", "javacc"),
("jjt", "javacc"),
("javascript", "javascript"),
("js", "javascript"),
("clp", "jess"),
("jgr", "jgraph"),
("jpg", "jpeg"),
("png", "png"),
("properties", "jproperties"),
("jsp", "jsp"),
("kix", "kix"),
("ks", "kscript"),
("k", "kwt"),
("ACE", "lace"),
("ace", "lace"),
("latte", "latte"),
("lte", "latte"),
("l", "lex"),
("lex", "lex"),
("lhs", "lhaskell"),
("ll", "lexpp"),
("cl", "lisp"),
("el", "lisp"),
("jl", "lisp"),
("lisp", "lisp"),
("lsp", "lisp"),
("lite", "lite"),
("lt", "lite"),
("lgt", "logtalk"),
("lot", "lotos"),
("lotos", "lotos"),
("lou", "lout"),
("lout", "lout"),
("sig", "lprolog"),
("lss", "lss"),
("lua", "lua"),
("mc", "m4"),
("eml", "mail"),
("dsp", "make"),
("mak", "make"),
("mk", "make"),
("man", "man"),
("mpl", "maple"),
("mv", "maple"),
("mws", "maple"),
("mason", "mason"),
("mhtml", "mason"),
("mel", "mel"),
("mf", "mf"),
("mgp", "mgp"),
("mib", "mib"),
("mms", "mmix"),
("moc", "moc"),
("DEF", "modula2"),
("MOD", "modula2"),
("m2", "modula2"),
("md", "modula2"),
("mi", "modula2"),
("i3", "modula3"),
("ig", "modula3"),
("m3", "modula3"),
("mg", "modula3"),
("isc", "monk"),
("monk", "monk"),
("ssc", "monk"),
("tsc", "monk"),
("moo", "moo"),
("mp", "mp"),
("msql", "msql"),
("mush", "mush"),
("mysql", "mysql"),
(".NSA", "natural"),
(".NSC", "natural"),
(".NSG", "natural"),
(".NSL", "natural"),
(".NSM", "natural"),
(".NSN", "natural"),
(".NSP", "natural"),
(".NSS", "natural"),
("ncf", "ncf"),
("nqc", "nqc"),
("OPL", "opl"),
("OPl", "opl"),
("Opl", "opl"),
("dpr", "pascal"),
("g", "pccts"),
("inc", "php"),
("ml", "ocaml"),
("mli", "ocaml"),
("mll", "ocaml"),
("mly", "ocaml"),
("mm", "nroff"),
("nr", "nroff"),
("nsi", "nsis"),
("o", "object"),
("obj", "object"),
("opl", "opl"),
("or", "openroad"),
("ora", "ora"),
("papp", "papp"),
("pas", "pascal"),
("php", "php"),
("php", "php"),
("pl", "perl"),
("pxml", "papp"),
("pxsl", "papp"),
("roff", "nroff"),
("sho", "dllobject"),
("sob", "dllobject"),
("tr", "nroff"),
("xin", "omnimark"),
("xom", "omnimark"),
("php3", "php"),
("phtml", "phtml"),
("lpc", "pike"),
("pike", "pike"),
("pmod", "pike"),
("ulpc", "pike"),
("rcp", "pilrc"),
("p36", "plm"),
("pac", "plm"),
("plm", "plm"),
("plp", "plp"),
("pls", "plsql"),
("plsql", "plsql"),
("po", "po"),
("pod", "pod"),
("eps", "postscript"),
("ps", "postscript"),
("pov", "pov"),
("ppd", "ppd"),
("ih", "ppwiz"),
("it", "ppwiz"),
("pdb", "prolog"),
("psf", "psf"),
("py", "python"),
("py", "python"),
("mat", "radiance"),
("rad", "radiance"),
("rc", "rc"),
("rex", "rexx"),
("rexx", "rexx"),
("x", "rpcgen"),
("rpl", "rpl"),
("rtf", "rtf"),
("rbw", "ruby"),
("rbw", "ruby"),
("sas", "sas"),
("sa", "sather"),
("scm", "scheme"),
("sci", "scilab"),
("pdl", "sdl"),
("pr", "sdl"),
("sed", "sed"),
("sgm", "sgml"),
("sgml", "sgml"),
("bash", "sh"),
("ebuild", "sh"),
("env", "sh"),
("ksh", "sh"),
("sh", "sh"),
("sh", "sh"),
("sim", "simula"),
("s85", "sinda"),
("sin", "sinda"),
("il", "skill"),
("sl", "slang"),
("score", "slrnsc"),
("tpl", "smarty"),
("smith", "smith"),
("smt", "smith"),
("sml", "sml"),
("sno", "snobol4"),
("spec", "spec"),
("sp", "spice"),
("spice", "spice"),
("spd", "spup"),
("spdata", "spup"),
("speedup", "spup"),
("pkb", "sql"),
("pks", "sql"),
("sql", "sql"),
("tyb", "sql"),
("tyc", "sql"),
("typ", "sql"),
("sqlj", "sqlj"),
("sqi", "sqr"),
("sqr", "sqr"),
("s19", "srec"),
("s28", "srec"),
("s37", "srec"),
("cls", "st"),
("st", "st"),
("stp", "stp"),
("tak", "tak"),
("itcl", "tcl"),
("itk", "tcl"),
("tar", "tar"),
("tar.bz2", "tarbz2"),
("tar.gz", "targz"),
("tgz", "targz"),
("tcl", "tcl"),
("tk", "tcl"),
("ti", "terminfo"),
("dtx", "tex"),
("latex", "tex"),
("ltx", "tex"),
("sty", "tex"),
("tex", "tex"),
("texi", "texinfo"),
("texinfo", "texinfo"),
("txi", "texinfo"),
("tf", "tf"),
("t.html", "tilde"),
("tli", "tli"),
("slt", "tsalt"),
("tsscl", "tsscl"),
("tssgm", "tssgm"),
("tssop", "tssop"),
("uc", "uc"),
("ui", "ui"),
("uil", "uil"),
("uit", "uil"),
("ctl", "vb"),
("dsm", "vb"),
("sba", "vb"),
("vbs", "vb"),
("v", "verilog"),
("hdl", "vhdl"),
("vbe", "vhdl"),
("vhd", "vhdl"),
("vhdl", "vhdl"),
("vst", "vhdl"),
("vim", "vim"),
("hw", "virata"),
("module", "virata"),
("pkg", "virata"),
("wrl", "vrml"),
("wm", "webmacro"),
("wbt", "winbatch"),
("wml", "wml"),
("doc", "word"),
("wsc", "wsh"),
("wsf", "wsh"),
("ad", "xdefaults"),
("msc", "xmath"),
("msf", "xmath"),
("xpm2", "xpm2"),
("xs", "xs"),
("xsd", "xsd"),
("xsl", "xslt"),
("y", "yacc"),
("yy", "yaccpp"),
("zip", "zip"),
("z8a", "z8a"),
]
_def_regexp_list = [
("[cC]hange[lL]og", "changelog", 1),
("/var/named/", "bindzone", 0),
("crontab", "crontab", 1),
(".*\\drac\\.", "dracula", 0),
(".*fvwmrc", "fvwm", 0),
(".*fvwm95", "fvwm", 0),
(".*fvwm2rc", "fvwm", 0),
("\\.gtkrc", "gtkrc", 1),
("gtkrc", "gtkrc", 1),
("Prl.*\\.", "jam", 1),
("JAM.*\\.", "jam", 1),
("[mM]akefile", "make", 1),
("muttrc", "muttrc", 1),
("tmac\\.", "nroff", 1),
(".*printcap", "printcap", 0),
(".*termcap", "termcap", 0),
(".*vimrc", "vim", 0),
("Xresources", "xdefaults", 1),
(".*/app-defaults/", "xdefaults", 0),
(".*/Xresources/", "xdefaults", 0),
("XF86Config", "xf86conf", 1),
(".*xmodmap", "xmodmap", 0),
("zsh", "zsh", 1),
("zlog", "zsh", 1),
("xdm-config$", "xdefaults", 1),
("\\.Xresources$", "xdefaults", 1),
("\\.Xpdefaults$", "xdefaults", 1),
("\\.Xdefaults$", "xdefaults", 1),
("XF86Config$", "xf86conf", 1),
("cvs\\d+$", "cvs", 1),
("wvdial\\.conf$", "wvdial", 1),
("\\wgetrc$", "wget", 1),
("\\.wgetrc$", "wget", 1),
("vgrindefs$", "vgrindefs", 1),
("\\.viminfo", "viminfo", 1),
("\\_viminfo", "viminfo", 1),
(".*\\.vhdl_[0-9]*$", "vhdl", 0),
("\\tidyrc$", "tidy", 1),
("\\.tidyrc$", "tidy", 1),
("texmf\\.cnf$", "texmf", 1),
("tags$", "tags", 1),
("squid\\.conf$", "squid", 1),
("vision\\.conf$", "hog", 1),
("snort\\.conf$", "hog", 1),
("\\.lrnrc", "slrnrc", 1),
("screenrc$", "screen", 1),
("\\.screenrc$", "screen", 1),
("\\.zcompdump", "zsh", 1),
("\\.zfbfmarks$", "zsh", 1),
("\\.zprofile$", "zsh", 1),
("\\.zlog", "zsh", 1),
("\\.zsh", "zsh", 1),
("csh\\.logout$", "csh", 1),
("csh\\.login$", "csh", 1),
("csh\\.cshrc$", "csh", 1),
("\\.alias", "csh", 1),
("\\.tcshrc", "csh", 1),
("\\.cshrc", "csh", 1),
("\\.login", "csh", 1),
("\\.profile", "sh", 1),
("/etc/profile", "sh", 0),
("\\.kshrc", "sh", 1),
("\\.bashrc", "sh", 1),
("bashrc", "sh", 1),
("bash\\.bashrc", "sh", 1),
("\\.bash_profile", "sh", 1),
("\\.bash_logout", "sh", 1),
("sgml\\.catalog", "catalog", 1),
("catalog$", "catalog", 1),
("sendmail\\.cf", "sendmail", 1),
("smb\\.conf", "samba", 1),
("robots.txt", "robots", 1),
("\\.reminders", "remind", 1),
("\\.inputrc$", "readline", 1),
("\\.ratpoisonrc$", "ratpoison", 1),
("\\ratpoisonrc$", "ratpoison", 1),
("\\.procmail$", "procmail", 1),
("\\.procmailrc$", "procmail", 1),
(".*printcap$", "printcap", 0),
(".*termcap$", "termcap", 0),
("\\.povrayrc$", "povini", 1),
("main.cf$", "pfmain", 1),
("\\.pinerc$", "pine", 1),
("\\pinerc$", "pine", 1),
("\\.muttrc", "muttrc", 1),
("\\.mutt/muttrc", "muttrc", 1),
("Muttrc$", "muttrc", 1),
("[mM]akefile$", "make", 1),
("GNUmakefile$", "make", 1),
("snd.\\d+$", "mail", 1),
("\\.letter$", "mail", 1),
("\\.letter\\.\\d+$", "mail", 1),
("\\.followup$", "mail", 1),
("\\.article$", "mail", 1),
("\\.article\\.\\d+$", "mail", 1),
("\\pico\\.\\d+$", "mail", 1),
("\\mutt-.*-\\d+$", "mail", 1),
("\\mutt\\w{6}$", "mail", 1),
("\\ae\\d+\\.txt$", "mail", 1),
("/tmp/SLRN[0-9A-Z.]+$", "mail", 0),
("\\.emacs$", "lisp", 1),
("\\.sawfishrc$", "lisp", 1),
("lilo.conf", "lilo", 1),
("lftp.conf$", "lftp", 1),
("\\.lftprc$", "lftp", 1),
(".*lftp/rc$", "lftp", 0),
(".*properties_..$", "jproperties", 0),
(".*properties_.._..$", "jproperties", 0),
(".*properties_.._.._.*$", "jproperties", 0),
("inittab$", "inittab", 1),
("\\.gtkrc$", "gtkrc", 1),
("gtkrc$", "gtkrc", 1),
("gkrellmrc_.$", "gkrellmrc", 1),
("gkrellmrc$", "gkrellmrc", 1),
("\\.gdbinit$", "gdb", 1),
("fstab$", "fstab", 1),
("auto.master$", "conf", 1),
("exports$", "exports", 1),
("filter-rules$", "elmfilt", 1),
(".*lvs$", "dracula", 0),
(".*lpe$", "dracula", 0),
("debian/control$", "debcontrol", 1),
(".*\\.\\.ch$", "ch", 0),
("named\\.conf$", "named", 1),
("named\\.root$", "bindzone", 1),
("build\\.xml$", "ant", 1),
(".*vimrc$", "vim", 0),
(".*exrc$", "vim", 0),
("configure$", "sh", 1),
("configure.ac$", "config", 1),
(".*COPYING$", "text", 0),
(".*README$", "text", 0),
(".*read.me$", "text", 0),
("proftpd\\.conf", "apachestyle", 1),
("httpd\\.conf", "apache", 1),
("srm\\.conf", "apache", 1),
("access\\.conf", "apache", 1),
("apache\\.conf", "apache", 1),
("\\.htaccess$", "apache", 1),
(".*enlightenment/.*\\.cfg$", "c", 0),
(".*Eterm/.*\\.cfg$", "eterm", 0),
("lynx\\.cfg$", "lynx", 1),
(".*baseq[2-3]/.*\\.cfg$", "quake", 0),
(".*id1/.*\\.cfg$", "quake", 0),
(".*quake[1-3]/.*\\.cfg$", "quake", 0),
("crontab$", "crontab", 1),
]
_def_script_list = [
(".*\\bpython", "python"),
(".*\\bperl", "perl"),
(".*\\bphp", "php"),
(".*\\bruby", "ruby"),
(".*\\bbc\\b", "bc"),
(".*\\bsed\\b", "sed"),
(".*\\bocaml", "ocaml"),
(".*awk\\b", "awk"),
(".*wml\\b", "wml"),
(".*\\bksh\\b", "sh"),
(".*\\bsh\\b", "sh"),
(".*\\bbash", "sh"),
(".*csh\\b", "csh"),
(".*\\bzsh\\b", "zsh"),
(".*\\btclsh\\b", "tcl"),
(".*\\bwish\\b", "tcl"),
(".*\\bexpectk\\b", "tcl"),
(".*\\bitclsh\\b", "tcl"),
(".*\\bitwish\\b", "tcl"),
(".*\\bexpect\\b", "expect"),
(".*\\bgnuplot\\b", "gnuplot"),
(".*make\\b", "make"),
]
_def_python_list = [
("am", 0,
""" # Use Python to avoid the .am suffix is recognized
if string.lower(fname_base) == "makefile.am":
type = "automake"
"""),
("bas,frm", 0,
""" if string.lower(fname[-3:]) == "frm":
type = "form"
else:
type = "basic"
f = open(fname)
l = ''
try:
for i in xrange(1,5):
l = l + f.readline()
except:
pass
f.close()
if re.search("VB_Name|Begin VB\\\\.(Form|MDIForm|UserControl)", l, re.I):
type = "vb"
"""),
("ch", 0,
""" type = "ch"
f = open(fname)
try:
for i in xrange(1,10):
if f.readline()[0] == '@':
type = "change"
break
except:
pass
f.close()
"""),
("e,E", 0,
""" type = "eiffel"
f = open(fname)
try:
for i in xrange(1,100):
if cre_match("\\\\s*(<'|'>)\\\\s*$", f.readline()):
type = "specman"
break
except:
pass
f.close()
"""),
("ent", 0,
""" type = "dtd"
f = open(fname)
try:
for i in xrange(1,6):
l = f.readline()
if cre_match("\\\\s*[#{]", l):
type = "cl"
break
if not cre_match("\\\\s*$", l):
break
except:
pass
f.close()
"""),
("rul", 0,
""" type = "diva"
f = open(fname)
try:
for i in xrange(1,6):
if string.find("InstallShield", f.readline()):
type = "ishd"
break
except:
pass
f.close()
"""),
("com", 0,
""" type = "dcl"
f = open(fname)
try:
l1 = f.readline() + f.readline()
l2 = f.readline() + f.readline()
if (cre_search("\\\\$ORIGIN|\\\\$TTL|IN\\\\s*SOA", l1)
or cre_search("BIND.*named", l1 + l2)):
type = "dns"
except:
pass
f.close()
"""),
("in", 0,
""" # Use Python to avoid the .in suffix is recognized
if fname_base == "configure.in":
type = "config"
"""),
("m", 0,
""" type = "matlab"
f = open(fname)
try:
for i in xrange(1,10):
l = f.readline()
if cre_match("\\\\s*#(include|import)", l):
type = "objc"
break
if cre_match("\\\\s*%", l):
break
if cre_match("\\\\s*\\\\(\\\\*", l):
type = "mma"
break
except:
pass
f.close()
"""),
("mod", 0,
""" type = "modsim3"
f = open(fname)
try:
if cre_search("\\\\bmodule\\\\b", f.readline()):
type = "lprolog"
except:
pass
f.close()
"""),
("1,2,3,4,5,6,7,8,9,t,ms", 0,
""" f = open(fname)
found = 0
try:
for i in xrange(1,5):
l = f.readline()
if not l:
break
if l[0] == '.':
type = "nroff"
found = 1
break
except:
pass
f.close()
if not found:
if fname[-1] == 't':
type = "tads"
elif fname[-1] == 's':
type = "xmath"
"""),
("pl", 0,
""" type = "perl"
f = open(fname)
try:
while 1:
l = f.readline()
if l:
break
except:
pass
f.close()
if (cre_search("\\\\bprolog\\\\b|:-", l)
or cre_match("\\\\s*(%+(\\\\s|$)|/\\\\*)", l)):
type = "prolog"
"""),
("pm", 0,
""" type = "perl"
f = open(fname)
try:
l = f.readline()
except:
pass
f.close()
if cre_search("XPM2", l):
type = "xpm2"
elif cre_search("XPM", l):
type = "xpm"
"""),
("inc", 0,
""" type = "php"
f = open(fname)
l = ''
try:
for i in xrange(1,3):
l = l + f.readline()
except:
pass
f.close()
if cre_search("perlscript", l):
type = "aspperl"
elif cre_search("<%", l):
type = "aspvbs"
elif cre_search("<?", l):
type = "php"
else:
type = "asm" # could also be "pov", how to check?
"""),
("w", 0,
""" type = "cweb"
f = open(fname)
try:
if cre_search("&ANALYZE", f.readline()):
type = "progress"
else:
f.readline()
if cre_search("&GLOBAL-DEFINE", f.readline()):
type = "progress"
except:
pass
f.close()
"""),
("i", 0,
""" type = asm
f = open(fname)
found = 0
try:
for i in xrange(1,10):
l = f.readline()
if l[0] == '*' or cre_match("\\\\s*;", l):
found = 1
break
if not cre_match("\\\\s*$", l) or cre_match("/\\\\*", l):
break
except:
pass
f.close()
if not found:
type = "progress"
"""),
("p", 0,
""" type = "pascal"
f = open(fname)
found = 0
try:
for i in xrange(1,10):
l = f.readline()
if cre_match("\\\\s*((program|procedure|function|const|type|var)\\\\b|{)", l):
found = 1
break
if not cre_match("\\\\s*$", l) or cre_match("/\\\\*", l):
break
except:
pass
f.close()
if not found:
type = "progress"
"""),
("reg", 0,
""" f = open(fname)
try:
if cre_match("REGEDIT[0-9]*\\\\s*$", f.readline()):
type = "registry"
except:
pass
f.close()
"""),
("r", 0,
""" type = "rexx"
f = open(fname)
try:
if cre_match("REBOL", f.readline()):
type = "rebol"
except:
pass
f.close()
"""),
("decl,dcl,dec", 0,
""" f = open(fname)
try:
l = f.readline() + f.readline() + f.readline()
if cre_match("<!SGML", l):
type = "sgmldecl"
except:
pass
f.close()
"""),
("smil", 0,
""" type = "smil"
f = open(fname)
try:
if cre_search("<?\\\\s*xml.*?>", f.readline()):
type = "xml"
except:
pass
f.close()
"""),
("smi", 0,
""" type = "mib"
f = open(fname)
try:
if cre_search("\\\\bsmil\\\\b", f.readline()):
type = "smil"
except:
pass
f.close()
"""),
("web", 0,
""" type = "winbatch"
f = open(fname)
try:
for i in xrange(0,5):
if f.readline()[0] == '%':
type = "web"
break
except:
pass
f.close()
"""),
("xpm", 0,
""" type = "xpm"
f = open(fname)
try:
if cre_search("XPM2", f.readline()):
type = "xpm2"
except:
pass
f.close()
"""),
("xml", 0,
""" type = "xml"
"""),
("", 0,
""" while 1:
if fname == "INDEX" or fname == "INFO":
f = open(fname)
try:
if cre_match("\\\\s*(distribution|installed_software|root|bundle|product)\\\\s*$", f.readline()):
type = "psf"
f.close()
break
except:
pass
f.close()
if string.find("jarg", fname):
f = open(fname)
try:
for i in xrange(0,5):
if re.search("THIS IS THE JARGON FILE", f.readline(), re.I):
type = "jargon"
break
except:
pass
f.close()
break
"""),
("", 1,
""" if ignore and fname[-1] == '~':
type = ft_detect(fname[:-1], 1)
"""),
("", 1,
""" f = open(fname)
line1 = f.readline()
lines = ['', line1, '', '', '', '']
for i in xrange(2, 6):
try:
lines[i] = f.readline()
except:
break
if line1 and line1[0] == ':' and line1[1] == '\\\\n':
type = "sh"
elif cre_match("#(compdef|autoload)\\\\b", line1):
type = "zsh"
elif cre_match("From [a-zA-Z][a-zA-Z_0-9\\\\.=-]*(@[^ ]*)? .*[12][09]\\\\d\\\\d$", line1):
type = "mail"
elif cre_match("<[%&].*>", line1):
type = "mason"
elif cre_match('" *[vV]im$', line1):
type = "vim"
elif cre_match("\\\\*\\\\* LambdaMOO Database, Format Version", line1):
type = "moo"
elif (cre_match("diff\\\\b|Only in |\\\\d+(,\\\\d+)?[cda]\\\\d+\\\\b|# It was generated by makepatch |Index:\\\\s+\\\\S+$|==== //\\\\S+#\\\\d+", line1)
or (cre_match("--- ", line1) and cre_match("+++ ", lines[2]))
or (cre_match("\\\\*\\\\*\\\\* ", line1) and cre_match("--- ", lines[2]))):
type = "diff"
elif cre_match("%!\\\\s*PS", line1):
type = "postscript"
elif (cre_match("\\\\s*dnl\\\\b", line1)
or cre_match("\\\\s*dnl\\\\b", lines[2])
or cre_match("\\\\s*dnl\\\\b", lines[3])
or cre_match("\\\\s*dnl\\\\b", lines[4])
or cre_match("\\\\s*dnl\\\\b", lines[5])):
type = "m4"
elif re.match(" *proc[nd] *$", line1, re.I):
type = "sicad"
elif cre_match("\\\\*\\\\*\\\\* Purify", line1):
type = "purifylog"
elif cre_search("<\\\\?\\\\s*xml.*\\\\?>", line1):
type = "xml"
elif cre_match("[0-9a-fA-F]{7}: [0-9a-fA-F]{2} [0-9a-fA-F]{2} [0-9a-fA-F]{2} [0-9a-fA-F]{2} ", line1):
type = "xxd"
elif cre_match("RCS file:", line1) or cre_match("RCS file:", lines[2]):
type = "rcslog"
elif cre_match("CVS:", lines[2]):
type = "cvs"
elif cre_match("SEND-PR:", line1):
type = "sendpr"
elif cre_match("SNNS network definition file", line1):
type = "snnsnet"
elif cre_match("SNNS pattern definition file", line1):
type = "snnspat"
elif cre_match("SNNS result file", line1):
type = "snnsres"
elif (cre_match("%.*?[Vv]irata", line1)
or cre_match("%.*?[Vv]irata", lines[2])
or cre_match("%.*?[Vv]irata", lines[3])
or cre_match("%.*?[Vv]irata", lines[4])
or cre_match("%.*?[Vv]irata", lines[5])):
type = "virata"
elif cre_match("[0-9]* *execve\\\\(", line1):
type = "strace"
elif (cre_search("K & K Associates", lines[4])
or cre_search("TAK 2000", lines[2])):
type = "takout"
elif cre_search("S Y S T E M S I M P R O V E D ", lines[3]):
type = "sindaout"
# takcmp and sindacmp skipped
elif (cre_search("\\\\$ORIGIN|\\\\$TTL|IN\\\\s*SOA", line1 + lines[2])
or cre_search("BIND.*named", line1 + lines[2] + lines[3] + lines[4])):
type = "dns"
elif ((cre_search("\\\\|\\\\*{1,80}", line1)
and cre_search("VRC ", lines[2]))
or (cre_search("\\\\|\\\\*{1,80}", lines[2])
and cre_search("VRC ", lines[3]))):
type = "baan"
elif cre_match("==\\\\d+== valgrind", line1):
type = "valgrind"
else:
line = None
for i in xrange(1,6):
if not cre_match("\\\\? ", lines[i]):
line = lines[i]
break
if not line:
while 1:
try:
l = f.readline()
if not cre_match("\\\\? ", l):
line = l
break
except:
break
if line and cre_match("Index:\\\\s+\\\\S+$", line):
type = "diff"
f.close()
"""),
("mas,master", 1,
""" type = "master"
"""),
("m4", 1,
""" type = "m4"
"""),
("me", 1,
""" type = "nroff"
"""),
("txt", 1,
""" type = "text"
"""),
("inp", 1,
""" f = open(fname)
try:
l = f.readline()
if l[0] == '*':
type = "abaqus"
else:
for i in xrange(1, 500):
if len(l) >= 19 and string.lower(l[:19]) == "header surface data":
type = "trasys"
break
l = f.readline()
except:
pass
f.close()
"""),
("asp", 1,
""" type = "aspvbs"
f = open(fname)
try:
l = f.readline()
l = l + f.readline()
l = l + f.readline()
except:
pass
if string.find("perlscript", string.lower(l)) >= 0:
type = "aspperl"
f.close()
"""),
("cfg", 1,
""" type = "cfg"
"""),
]
if os.name == "posix":
_case_detect_list = """
suffix L lisp
suffix C cpp
suffix H cpp
"""
_py_list_before = []
_suffix_dict = {}
_regexp_list = []
_script_list = []
_py_list_after = []
_cache_dict = [{}, {}]
_filetype_dict = {}
_filetype_pre_list = [
"abaqus",
"asm",
"aspperl",
"aspvbs",
"automake",
"baan",
"basic",
"cfg",
"ch",
"change",
"cl",
"config",
"cvs",
"cweb",
"dcl",
"diff",
"diva",
"dns",
"dtd",
"eiffel",
"form",
"ishd",
"jargon",
"lprolog",
"m4",
"mail",
"mason",
"master",
"matlab",
"mib",
"mma",
"modsim3",
"moo",
"nroff",
"objc",
"pascal",
"perl",
"php",
"postscript",
"progress",
"prolog",
"psf",
"purifylog",
"rcslog",
"rebol",
"registry",
"rexx",
"sendpr",
"sgmldecl",
"sh",
"sicad",
"sindaout",
"smil",
"snnsnet",
"snnspat",
"snnsres",
"specman",
"strace",
"tads",
"takout",
"text",
"trasys",
"valgrind",
"vb",
"vim",
"virata",
"web",
"winbatch",
"xmath",
"xml",
"xpm",
"xpm2",
"xxd",
"zsh",
"libobject",
"ltobject"
]
_did_init = 0
def __init__():
global _suffix_dict, _regexp_list, _script_list
global _py_list_before, _py_list_after
global _did_init
global _filetype_dict
if _did_init:
return
_did_init = 1
_py_list_before = []
_suffix_dict = {}
_regexp_list = []
_script_list = []
_py_list_after = []
_filetype_dict = {}
_add_suffixlist(_def_suffix_list)
_add_regexplist(_def_regexp_list)
_add_scriptlist(_def_script_list)
_add_pythonlist(_def_python_list)
if os.name == "posix":
ft_add_rules(_case_detect_list, 1)
for dirpath in default_dirs({}):
ft_check_dir(os.path.join(dirpath, "afd"))
for i in _filetype_pre_list:
_filetype_dict[i] = 1
class DetectError(Exception):
"""Error for something gone wrong."""
def __init__(self, args = None):
Exception.__init__(self)
self.args = args
def ft_known(type):
"""Return True when "type" is a known filetype."""
__init__()
return _filetype_dict.has_key(type)
def ft_declare(type):
"""Delcare "type" to be a known filetype."""
__init__()
_filetype_dict[type] = 1
def ft_check_dir(dir, errmsg = 0, recdict = None):
"""Check directory "dir" for *.afd files and load them.
When "errmsg" is non-zero give an error message when the directory
doesn't exist."""
if os.path.exists(dir) and os.path.isdir(dir):
for f in glob.glob(os.path.join(dir, "*.afd")):
try:
ft_read_file(f, recdict)
except DetectError, e:
if _run_as_program:
print str(e)
else:
from Message import msg_error
msg_error(recdict, str(e))
elif errmsg:
e = _('Directory does not exist: "%s"') % dir
if _run_as_program:
print e
else:
from Message import msg_error
msg_error(recdict, e)
def ft_read_file(fname, recdict = None):
"""Read file "fname" for file type detection rules."""
try:
fd = open(fname)
except IOError, e:
raise DetectError, (_('Cannot open "%s": ') % fname) + str(e)
try:
s = fd.read()
except IOError, e:
raise DetectError, (_('Cannot read "%s": ') % fname) + str(e)
fd.close()
ft_add_rules(s, 1, recdict)
def ft_add_rules(dtstr, recipe_line_nr, recdict = None):
"""Add file type detection rules from string "dtstr".
"recipe_line_nr" is the first line number in a recipe, zero when not
reading a recipe."""
__init__()
lines = string.split(dtstr, '\n')
line_idx = 0
line_count = len(lines)
while line_idx < line_count:
line = lines[line_idx]
items = string.split(line, None, 1)
if len(items) < 1 or items[0][0] == '#':
line_idx = line_idx + 1
continue
itype = items[0]
if len(items) < 2:
rline = ''
else:
rline = items[1]
rline_len = len(rline)
astart = 0
if astart < rline_len:
if rline[astart] == '"' or rline[astart] == "'":
quote = rline[astart]
astart = astart + 1
aend = astart
while aend < rline_len and rline[aend] != quote:
aend = aend + 1
if aend == rline_len:
raise DetectError, (_('Missing quote in line %d: "%s"')
% (line_idx + recipe_line_nr, line))
n = aend + 1
else:
aend = astart
while aend < rline_len and rline[aend] != ' ' and rline[aend] != '\t':
aend = aend + 1
n = aend
arg1 = rline[astart:aend]
else:
arg1 = ''
n = rline_len
args = string.split(rline[n:])
if len(args) >= 1:
arg2 = args[0]
else:
arg2 = ''
if len(args) >= 2:
arg3 = args[1]
else:
arg3 = ''
if len(args) >= 3:
arg4 = args[2]
else:
arg4 = ''
if ((itype in ["suffix", "regexp", "script"] and not arg2)
or (itype == "declare" and not arg1)):
raise DetectError, (_('Missing argument in line %d: "%s"')
% (line_idx + recipe_line_nr, line))
if itype == "declare":
_filetype_dict[arg1] = 1
elif itype == "suffix":
_add_suffix(arg1, arg2)
elif itype == "regexp":
_add_regexp(arg1, arg2, arg3 == "tail" or arg4 == "tail",
arg3 == "append" or arg4 == "append")
elif itype == "script":
_add_script(arg1, arg2, arg3 and arg3 == "append")
elif itype == "python":
append = 0
after = 0
suffix = None
for arg in [arg1, arg2, arg3]:
if arg:
if arg == "append":
append = 1
elif arg == "after":
after = 1
elif not suffix:
suffix = arg
else:
raise DetectError, (
_('Illegal argument in line %d: "%s"')
% (line_idx + recipe_line_nr, line))
start_indent = get_indent(line)
line_idx = line_idx + 1
start_line_idx = line_idx + recipe_line_nr
cmds = ""
while line_idx < line_count:
line = lines[line_idx]
if get_indent(line) <= start_indent:
i = skip_white(line, 0)
if i < len(line) and line[i] != '#':
line_idx = line_idx - 1
break
cmds = cmds + line + '\n'
line_idx = line_idx + 1
if not cmds:
raise DetectError, (_('Python commands missing in line %d')
% (line_idx + recipe_line_nr))
_add_python(cmds, _("filetype detection; python code at line %d: ")
% start_line_idx, after, append, suffix)
else:
raise DetectError, (
_('Illegal item "%s" in argument to ft_add_rules(): %s')
% (itype, line))
line_idx = line_idx + 1
class _Ft_re:
"""Class used to store pairs of RE and file type."""
def __init__(self, regexp, type, tail):
self.re = regexp
self.type = type
self.tail = tail
self.cre = None
def comp(self):
"""Get the compiled regexp, cache the result."""
try:
self.cre = re.compile(self.re)
except StandardError, e:
raise DetectError, (_('Error in filetype detection regexp "%s": ')
% self.re) + str(e)
class _Ft_py:
"""Class used to store Python code for detecting a file type."""
def __init__(self, code, suffix, error_msg):
self.code = code
self.ccode = None
self.suffix = suffix
self.error_msg = error_msg
def compile(self):
if not self.ccode:
if self.code[0] == ' ' or self.code[0] == '\t':
tcode = "if 1:\n" + self.code
else:
tcode = self.code
try:
self.ccode = compile(tcode, 'filetype detection rules', 'exec')
except StandardError, e:
raise DetectError, (_('Error in Python code (%s): ')
% self.error_msg) + str(e)
def _add_suffix(suf, type):
"""Add detection of "type" by file name extension "suf".
When "type" is "ignore" it means the suffix is removed and further
detection done on the rest.
When "type" is "remove" an existing detection for "suf" is removed."""
if type == 'remove':
if _suffix_dict.has_key(suf):
del _suffix_dict[suf]
else:
_suffix_dict[suf] = type
_filetype_dict[type] = 1
def _add_suffixlist(list):
"""Add suffix rules from a list of suffix-type tuples."""
for suf, itype in list:
_suffix_dict[suf] = itype
_filetype_dict[itype] = 1
def _add_regexp(regexp, type, tail, append):
"""Add detection of "type" by matching the file name with Python regular
expression "regexp".
When append is non-zero, add to the end of the regexp rules.
When "type" is "remove" an existing detection for "regexp" is removed."""
if type == 'remove':
for r in _regexp_list:
if r.re == regexp:
_regexp_list.remove(r)
else:
f = _Ft_re(regexp, type, tail)
if append:
_regexp_list.append(f)
else:
_regexp_list.insert(0, f)
_filetype_dict[type] = 1
def _add_regexplist(list):
"""Add regexp rules from a list of regexp-type-tail tuples."""
for regexp, itype, tail in list:
_add_regexp(regexp, itype, tail, 0)
def _add_script(regexp, type, append):
"""Add detection of "type" by matching the script name in the first line of
the file with Python regular expression "regexp".
When append is non-zero, add to the end of the script rules.
When "type" is "remove" an existing detection for "regexp" is removed."""
if type == 'remove':
for r in _script_list:
if r.re == regexp:
_script_list.remove(r)
else:
f = _Ft_re(regexp, type, 0)
_filetype_dict[type] = 1
if append:
_script_list.append(f)
else:
_script_list.insert(0, f)
def _add_scriptlist(list):
"""Add script rules from a list of scriptname-type tuples."""
for regexp, itype in list:
_add_script(regexp, itype, 0)
def _add_python(code, error_msg, after, append, suffix):
"""Add detection of "type" by using Python code "code".
Each line in "code" must end in a '\n'.
"error_msg" is printed when executing the code results in an error.
When "after" is non-zero use this rule after suffix, regexp and script
rules.
When append is non-zero, add to the end of the python rules."""
if suffix:
l = string.split(suffix, ',')
else:
l = []
p = _Ft_py(code, l, error_msg)
if after:
ilist = _py_list_after
else:
ilist = _py_list_before
if append:
ilist.append(p)
else:
ilist.insert(0, p)
def _add_pythonlist(list):
"""Add python rules from a list of type-after-script tuples."""
msg = _("default rule")
for suffix, after, script in list:
_add_python(script, msg, after, 0, suffix)
def _exec_py(fname, item, ignore):
"""Execute the code defined with _add_python()."""
exec_recdict["fname"] = fname
exec_recdict["fname_base"] = os.path.basename(fname)
exec_recdict["ft_detect"] = ft_detect
exec_recdict["ignore"] = ignore
if exec_recdict.has_key("type"):
del exec_recdict["type"]
item.compile()
try:
exec item.ccode in exec_recdict, exec_recdict
except IOError, e:
pass
except StandardError, e:
raise DetectError, _(item.error_msg) + str(e)
if exec_recdict.has_key("type"):
return exec_recdict["type"]
return None
def ft_detect(fname, ignore = 0, recdict = None):
"""Detect the file type for file "fname".
Returns the type as a string or None."""
if _cache_dict[ignore].has_key(fname):
return _cache_dict[ignore][fname]
if os.path.isdir(fname):
_cache_dict[ignore][fname] = "directory"
return "directory"
i18n_init()
__init__()
fname = fname_fold(fname)
i = string.rfind(fname, ".")
if i > 0:
suffix = fname[i + 1:]
else:
suffix = ''
for p in _py_list_before:
if not p.suffix or suffix in p.suffix:
atype = _exec_py(fname, p, ignore)
if atype:
_cache_dict[ignore][fname] = atype
return atype
bn = os.path.basename(fname)
i = string.find(bn, ".")
while i > 0 and i + 1 < len(bn):
if _suffix_dict.has_key(bn[i + 1:]):
ft = _suffix_dict[bn[i + 1:]]
if ft == "ignore" and ignore:
ft = ft_detect(fname[:-(len(bn[i:]))], 1, recdict)
_cache_dict[ignore][fname] = ft
return ft
i = string.find(bn, ".", i + 1)
for r in _regexp_list:
if not r.cre:
r.comp()
if r.tail:
if r.cre.match(bn):
_cache_dict[ignore][fname] = r.type
return r.type
else:
if r.cre.match(fname):
_cache_dict[ignore][fname] = r.type
return r.type
try:
f = open(fname)
line = f.readline()
f.close()
except:
pass
else:
if len(line) > 2 and line[:2] == "#!":
text = line[2:]
for r in _script_list:
if not r.cre:
r.comp()
if r.cre.match(text):
_cache_dict[ignore][fname] = r.type
return r.type
i = string.rfind(bn, ".")
if i > 0:
suffix = bn[i + 1:]
else:
suffix = ''
for p in _py_list_after:
if not p.suffix or suffix in p.suffix:
atype = _exec_py(fname, p, ignore)
if atype:
_cache_dict[ignore][fname] = atype
return atype
_cache_dict[ignore][fname] = None
return None
def filetype_root(ft):
"""When "ft" contains an underscore, return the part before the underscore.
This is the basic filetype for user-defined filetypes.
Return None otherwise."""
i = string.find(ft, '_')
if i > 0:
return ft[:i]
return None
if __name__ == '__main__':
i18n_init()
items = []
checkfile = None
_run_as_program = 1
next_is_dir = 0
next_is_file = 0
for arg in sys.argv[1:]:
if next_is_dir:
items.extend({"dir" : arg})
next_is_dir = 0
elif next_is_file:
items.extend({"file" : arg})
next_is_file = 0
elif len(arg) >= 2 and arg[:2] == "-I":
if len(arg) > 2:
items.extend({"dir" : arg[2:]})
else:
next_is_dir = 1
elif len(arg) >= 2 and arg[:2] == "-f":
if len(arg) > 2:
items.extend({"file" : arg[2:]})
else:
next_is_file = 1
else:
if checkfile:
print _("Can only check one file")
sys.exit(1)
checkfile = arg
if next_is_dir:
print _("-I argument must be followed by a directory name")
sys.exit(1)
if next_is_file:
print _("-f argument must be followed by a file name")
sys.exit(1)
if not checkfile:
print _("Usage: %s [-I ruledir] [-f rulefile] filename") % sys.argv[0]
sys.exit(1)
__init__()
for item in items:
if item.has_key("dir"):
ft_check_dir(item["dir"])
else:
try:
ft_read_file(item["file"])
except DetectError, e:
print e
try:
type = ft_detect(sys.argv[1])
if type == "ignore":
print ft_detect(sys.argv[1], 1), "(ignored suffix)"
else:
print ft_detect(sys.argv[1])
except DetectError, e:
sys.stderr.write("Detection error: " + str(e))