Hide
Need a quick chart or graph for your blog? Try our reStructured Text renderer. Join Siafoo Now or Learn More

Framework to get Filetype-Specific Metadata -- Including ID3 Tags Atom Feed 0

In Brief Provides a framework to get filetype-specific metadata. Included is an example to parse ID3 tags (version 1) from MP3 files.... more
# 's
 1"""Framework for getting filetype-specific metadata.
2
3Instantiate appropriate class with filename. Returned object acts like a
4dictionary, with key-value pairs for each piece of metadata.
5 import fileinfo
6 info = fileinfo.MP3FileInfo("/music/ap/mahadeva.mp3")
7 print "\\n".join(["%s=%s" % (k, v) for k, v in info.items()])
8
9Or use listDirectory function to get info on all files in a directory.
10 for info in fileinfo.listDirectory("/music/ap/", [".mp3"]):
11 ...
12
13Framework can be extended by adding classes for particular file types, e.g.
14HTMLFileInfo, MPGFileInfo, DOCFileInfo. Each class is completely responsible for
15parsing its files appropriately; see MP3FileInfo for example.
16
17This program is part of "Dive Into Python", a free Python book for
18experienced programmers. Visit http://diveintopython.org/ for the
19latest version.
20"""
21
22__author__ = "Mark Pilgrim (mark@diveintopython.org)"
23__version__ = "$Revision: 1.2 $"
24__date__ = "$Date: 2004/05/05 21:57:19 $"
25__copyright__ = "Copyright (c) 2001 Mark Pilgrim"
26__license__ = "Python"
27
28import os
29import sys
30
31def stripnulls(data):
32 "strip whitespace and nulls"
33 return data.replace("\00", " ").strip()
34
35class FileInfo(dict):
36 "store file metadata"
37 def __init__(self, filename=None):
38 self["name"] = filename
39
40class MP3FileInfo(FileInfo):
41 "store ID3v1.0 MP3 tags"
42 tagDataMap = {"title" : ( 3, 33, stripnulls),
43 "artist" : ( 33, 63, stripnulls),
44 "album" : ( 63, 93, stripnulls),
45 "year" : ( 93, 97, stripnulls),
46 "comment" : ( 97, 126, stripnulls),
47 "genre" : (127, 128, ord)}
48
49 def __parse(self, filename):
50 "parse ID3v1.0 tags from MP3 file"
51 self.clear()
52 try:
53 fsock = open(filename, "rb", 0)
54 try:
55 fsock.seek(-128, 2)
56 tagdata = fsock.read(128)
57 finally:
58 fsock.close()
59 if tagdata[:3] == 'TAG':
60 for tag, (start, end, parseFunc) in self.tagDataMap.items():
61 self[tag] = parseFunc(tagdata[start:end])
62 except IOError:
63 pass
64
65 def __setitem__(self, key, item):
66 if key == "name" and item:
67 self.__parse(item)
68 FileInfo.__setitem__(self, key, item)
69
70def listDirectory(directory, fileExtList):
71 "get list of file info objects for files of particular extensions"
72 fileList = [os.path.normcase(f) for f in os.listdir(directory)]
73 fileList = [os.path.join(directory, f) for f in fileList \
74 if os.path.splitext(f)[1] in fileExtList]
75 def getFileInfoClass(filename, module=sys.modules[FileInfo.__module__]):
76 "get file info class from filename extension"
77 subclass = "%sFileInfo" % os.path.splitext(filename)[1].upper()[1:]
78 return hasattr(module, subclass) and getattr(module, subclass) or FileInfo
79 return [getFileInfoClass(f)(f) for f in fileList]
80
81if __name__ == "__main__":
82 for info in listDirectory("/music/_singles/", [".mp3"]):
83 print "\n".join(["%s=%s" % (k, v) for k, v in info.items()])
84 print

Provides a framework to get filetype-specific metadata. Included is an example to parse ID3 tags (version 1) from MP3 files.

To parse other types of files, you just need to subclass FileInfo: see the docstrings for more info.