Rectangle 27 164

Fastest way to get a list with current directory's files - Python 3

>>> import os
>>> arr = next(os.walk('.'))[2]
>>> arr
['5bs_Turismo1.pdf', '5bs_Turismo1.pptx', 'esperienza.txt']
>>> import os
>>> path = os.getcwd()
>>> arr = []
>>> for files in next(os.walk(path))[2]:
>>>     arr.append(path + "\\" + files)
...
>>> for files in arr:
>>>     print(files)
...
F:\_moduli_economia\5bs_Turismo1.pdf
F:\_moduli_economia\5bs_Turismo1.pptx
F:\_moduli_economia\esperienza.txt

Here is a list of what I talked about in this answer:

  • 1.1 - Use of list comprehension to select only txt files
  • 1.2 - Using os.path.isfile to avoid directories in the list
  • 4.1 - python 2.7 - os.walk('.')
  • Example of use of os.walk('.') to count how many files there are in a directory and its subdirectories (for python 3.5 and 2.7)
  • Bonus: search for a type of files and copy them in a dir
>>> import os
>>> arr = os.listdir()
>>> arr
['$RECYCLE.BIN', 'work.txt', '3ebooks.txt', 'documents']
>>> arr_txt = [x for x in os.listdir() if x.endswith(".txt")]
>>> print(arr_txt)
['work.txt', '3ebooks.txt']
import os.path

listOfFiles = [f for f in os.listdir() if os.path.isfile(f)]

print(listOfFiles)

There are only files here

import pathlib

>>> flist = []
>>> for p in pathlib.Path('.').iterdir():
...  if p.is_file():
...   print(p)
...   flist.append(p)
...
error.PNG
exemaker.bat
guiprova.mp3
setup.py
speak_gui2.py
thumb.PNG

If you want to use list comprehension

>>> flist = [p for p in pathlib.Path('.').iterdir() if p.is_file()]

To include all the files in the subdirectory (in this example there are 11 files in the first directory and 3 in a subdirectory) I will use os.walk() that works sell in python 3.5 and newer versions:

import os
x = [i[2] for i in os.walk('.')]
y=[]
for t in x:
    for f in t:
        y.append(f)
print(y)
# print y # for 2.7 uncomment this and comment the previous line
>>> import os
>>> x = next(os.walk('F://python'))[2] # for the current dir use ('.')
>>> ['calculator.bat','calculator.py']

When you use next(os.walk(',')), you have the same results of os.listdir(), but you have the root as the first item of the list, all the folders in the second item and all the files in the third, while in os.listdir() you have folders and files in the same list. In both case (next(os.walk('.')) and os.listdir()) you just look in the current directory, leaving the subdirectory alone (you must use os.walk('-') for that, as we showed before).

>>> import os
>>> x = [f.name for f in os.scandir() if f.is_file()]
>>> x
['calculator.bat','calculator.py']

Another example with scandir (a little variation from docs.python.org) This one is more efficient than os.listdir. In this case, it shows the files only in the current directory where the script is executed.

>>> import os
>>> with os.scandir() as i:
...  for entry in i:
...   if entry.is_file():
...    print(entry.name)
...
ebookmaker.py
error.PNG
exemaker.bat
guiprova.mp3
setup.py
speakgui4.py
speak_gui2.py
speak_gui3.py
thumb.PNG
>>>
>>> import os
>>> mylist = os.listdir(os.getcwd())
>>> mylist
['$RECYCLE.BIN', 'work.txt', '3ebooks.txt', 'documents']
>>> for f in os.listdir('..'):
...     print f


>>> for f in os.listdir('/'):
...     print f

It's the same as in Python 3 (except the print)

>>> x = os.listdir('F:/python')
>>> for files in x:
>>>    print files
...
$RECYCLE.BIN
work.txt
3ebooks.txt
documents

5.1 - python 2 - os.walk('.')

Let's make an example for python 2.7 with walk (same as python 3).

>>> def getAllFiles(dir):
...     """Get all the files in the dir and subdirs"""
...     allfiles = []
...     for pack in os.walk(dir):
...         for files in pack[2]:
...             if os.path.isfile(files):
...                 allfiles += [files]
...     return allfiles
...
>>> getAllFiles("F://python")
['first.py', 'Modules.txt', 'test4Console.py', 'text4Console.bat', 'tkinter001.py']

In this example, we look for the number of files that are included in all the directory and its subdirecories.

import os    

def count(dir, counter=0):
    "returns number of files in dir and subdirs"
    for pack in os.walk(dir):
        for f in pack[2]:
            counter += 1
    return dir + " : " + str(counter) + "files"


print(count("F:\\python"))
>>> import glob
>>> glob.glob("*.txt")
['ale.txt', 'alunni2015.txt', 'assenze.text.txt', 'text2.txt', 'untitled.txt']

A little script that searches in all the subdirectories of some direcotories (I choose the ones that has an undescore symbol at the start), takes all the type of files (pdf or pptx or txt ecc.) amd copies them into a destination directory. This is useful if you have made a lot of subdirectory and you want to take a look to all the stuff you made... let's say presentations, in one place, without having to recall where you put that file or the other one. I wish you find it helpful. I used for my own purposes.

import os
import shutil
from path import path

destination = "F:\\pptx_copied"
# os.makedirs(destination)


def copyfile(dir, filetype='pptx', counter=0):
    "Searches for pptx (or other) files and copies them"
    for pack in os.walk(dir):
        for f in pack[2]:
            if f.endswith(filetype):
                fullpath = pack[0] + "\\" + f
                print(fullpath)
                shutil.copy(fullpath, destination)
                counter += 1
    if counter > 0:
        print("------------------------")
        print("\t==> Found in: `" + dir + "` : " + str(counter) + " files\n")


for dir in os.listdir():
    "searches for folders that starts with `_`"
    if dir[0] == '_':
        # copyfile(dir, filetype='pdf')
        copyfile(dir, filetype='txt')
_compiti18\Compito Contabilit 1\conti.txt
_compiti18\Compito Contabilit 1\modula4.txt
_compiti18\Compito Contabilit 1\moduloa4.txt
_compiti18\ottobre\3acc\compito.txt
_compiti18\ottobre\3acc\compito1530.txt
_compiti18\ottobre\3acc\compito1530_correttore.txt
_compiti18\ottobre\3acc\compito3825.txt
_compiti18\ottobre\3acc\compito3825_correttore.txt
_compiti18\ottobre\3acc\compito6028.txt
------------------------
==> Found in: `_compiti18` : 9 files

You should include the path argument to listdir.

I agree, but I did not notice something also, that python2 requires the argument whilst python3 is optional, If you improve the answer for both python versions would be great :)

Ok, I went into Python 2 and find the differences and I edited the post.

python - How do I list all files of a directory? - Stack Overflow

python directory
Rectangle 27 154

Fastest way to get a list with current directory's files - Python 3

Here is a list of what I talked about in this answer:

  • 1.1 - Use of list comprehension to select only txt files
  • 1.2 - Using os.path.isfile to avoid directories in the list
  • 4.1 - python 2.7 - os.walk('.')
  • Example of use of os.walk('.') to count how many files there are in a directory and its subdirectories (for python 3.5 and 2.7)
>>> import os
>>> arr = os.listdir()
>>> arr
['$RECYCLE.BIN', 'work.txt', '3ebooks.txt', 'documents']
>>> arr_txt = [x for x in os.listdir() if x.endswith(".txt")]
>>> print(arr_txt)
['work.txt', '3ebooks.txt']
import os.path

listOfFiles = [f for f in os.listdir() if os.path.isfile(f)]

print(listOfFiles)

There are only files here

import pathlib

>>> flist = []
>>> for p in pathlib.Path('.').iterdir():
...  if p.is_file():
...   print(p)
...   flist.append(p)
...
error.PNG
exemaker.bat
guiprova.mp3
setup.py
speak_gui2.py
thumb.PNG

If you want to use list comprehension

>>> flist = [p for p in pathlib.Path('.').iterdir() if p.is_file()]

To include all the files in the subdirectory (in this example there are 11 files in the first directory and 3 in a subdirectory) I will use os.walk() that works sell in python 3.5 and newer versions:

import os
x = [i[2] for i in os.walk('.')]
y=[]
for t in x:
    for f in t:
        y.append(f)
print(y)
# print y # for 2.7 uncomment this and comment the previous line
>>> import os
>>> x = next(os.walk('F://python'))[2] # for the current dir use ('.')
>>> ['calculator.bat','calculator.py']

When you use next(os.walk(',')), you have the same results of os.listdir(), but you have the root as the first item of the list, all the folders in the second item and all the files in the third, while in os.listdir() you have folders and files in the same list. In both case (next(os.walk('.')) and os.listdir()) you just look in the current directory, leaving the subdirectory alone (you must use os.walk('-') for that, as we showed before).

>>> import os
>>> x = [f.name for f in os.scandir() if f.is_file()]
>>> x
['calculator.bat','calculator.py']

Another example with scandir (a little variation from docs.python.org) This one is more efficient than os.listdir. In this case, it shows the files only in the current directory where the script is executed.

>>> import os
>>> with os.scandir() as i:
...  for entry in i:
...   if entry.is_file():
...    print(entry.name)
...
ebookmaker.py
error.PNG
exemaker.bat
guiprova.mp3
setup.py
speakgui4.py
speak_gui2.py
speak_gui3.py
thumb.PNG
>>>
>>> import os
>>> mylist = os.listdir(os.getcwd())
>>> mylist
['$RECYCLE.BIN', 'work.txt', '3ebooks.txt', 'documents']
>>> for f in os.listdir('..'):
...     print f


>>> for f in os.listdir('/'):
...     print f

It's the same as in Python 3 (except the print)

>>> x = os.listdir('F:/python')
>>> for files in x:
>>>    print files
...
$RECYCLE.BIN
work.txt
3ebooks.txt
documents

5.1 - python 2 - os.walk('.')

Let's make an example for python 2.7 with walk (same as python 3).

>>> def getAllFiles(dir):
...     """Get all the files in the dir and subdirs"""
...     allfiles = []
...     for pack in os.walk(dir):
...         for files in pack[2]:
...             if os.path.isfile(files):
...                 allfiles += [files]
...     return allfiles
...
>>> getAllFiles("F://python")
['first.py', 'Modules.txt', 'test4Console.py', 'text4Console.bat', 'tkinter001.py']

In this example, we look for the number of files that are included in all the directory and its subdirecories.

import os    

def count(dir, counter=0):
    "returns number of files in dir and subdirs"
    for pack in os.walk(dir):
        for f in pack[2]:
            counter += 1
    return dir + " : " + str(counter) + "files"


print(count("F:\\python"))
>>> import glob
>>> glob.glob("*.txt")
['ale.txt', 'alunni2015.txt', 'assenze.text.txt', 'text2.txt', 'untitled.txt']

You should include the path argument to listdir.

I agree, but I did not notice something also, that python2 requires the argument whilst python3 is optional, If you improve the answer for both python versions would be great :)

Ok, I went into Python 2 and find the differences and I edited the post.

python - How do I list all files of a directory? - Stack Overflow

python directory
Rectangle 27 148

How to get a list of files in Python 2, 3, 3.4, 3.5

Here is a list of what I talked about in this answer:

  • 1.1 - Use of list comprehension to select only txt files
  • 1.2 - Using os.path.isfile to avoid directories in the list
  • 4.1 - python 2.7 - os.walk('.')
  • Example of use of os.walk('.') to count how many files there are in a directory and its subdirectories (for python 3.5 and 2.7)
>>> import os
>>> arr = os.listdir()
>>> arr
['$RECYCLE.BIN', 'work.txt', '3ebooks.txt', 'documents']
>>> arr_txt = [x for x in os.listdir() if x.endswith(".txt")]
>>> print(arr_txt)
['work.txt', '3ebooks.txt']
import os.path

listOfFiles = [f for f in os.listdir() if os.path.isfile(f)]

print(listOfFiles)

There are only files here

import pathlib

>>> flist = []
>>> for p in pathlib.Path('.').iterdir():
...  if p.is_file():
...   print(p)
...   flist.append(p)
...
error.PNG
exemaker.bat
guiprova.mp3
setup.py
speak_gui2.py
thumb.PNG

If you want to use list comprehension

>>> flist = [p for p in pathlib.Path('.').iterdir() if p.is_file()]

To include all the files in the subdirectory (in this example there are 11 files in the first directory and 3 in a subdirectory) I will use os.walk() that works sell in python 3.5 and newer versions:

import os
x = [i[2] for i in os.walk('.')]
y=[]
for t in x:
    for f in t:
        y.append(f)
print(y)
# print y # for 2.7 uncomment this and comment the previous line
>>> import os
>>> x = next(os.walk('F://python'))[2] # for the current dir use ('.')
>>> ['calculator.bat','calculator.py']

When you use next(os.walk(',')), you have the same results of os.listdir(), but you have the root as the first item of the list, all the folders in the second item and all the files in the third, while in os.listdir() you have folders and files in the same list. In both case (next(os.walk('.')) and os.listdir()) you just look in the current directory, leaving the subdirectory alone (you must use os.walk('-') for that, as we showed before).

>>> import os
>>> x = [f.name for f in os.scandir() if f.is_file()]
>>> x
['calculator.bat','calculator.py']

Another example with scandir (a little variation from docs.python.org) This one is more efficient than os.listdir. In this case, it shows the files only in the current directory where the script is executed.

>>> import os
>>> with os.scandir() as i:
...  for entry in i:
...   if entry.is_file():
...    print(entry.name)
...
ebookmaker.py
error.PNG
exemaker.bat
guiprova.mp3
setup.py
speakgui4.py
speak_gui2.py
speak_gui3.py
thumb.PNG
>>>
>>> import os
>>> mylist = os.listdir(os.getcwd())
>>> mylist
['$RECYCLE.BIN', 'work.txt', '3ebooks.txt', 'documents']
>>> for f in os.listdir('..'):
...     print f


>>> for f in os.listdir('/'):
...     print f

It's the same as in Python 3 (except the print)

>>> x = os.listdir('F:/python')
>>> for files in x:
>>>    print files
...
$RECYCLE.BIN
work.txt
3ebooks.txt
documents

5.1 - python 2 - os.walk('.')

Let's make an example for python 2.7 with walk (same as python 3).

>>> def getAllFiles(dir):
...     """Get all the files in the dir and subdirs"""
...     allfiles = []
...     for pack in os.walk(dir):
...         for files in pack[2]:
...             if os.path.isfile(files):
...                 allfiles += [files]
...     return allfiles
...
>>> getAllFiles("F://python")
['first.py', 'Modules.txt', 'test4Console.py', 'text4Console.bat', 'tkinter001.py']

In this example, we look for the number of files that are included in all the directory and its subdirecories.

import os    

def count(dir, counter=0):
    "returns number of files in dir and subdirs"
    for pack in os.walk(dir):
        for f in pack[2]:
            counter += 1
    return dir + " : " + str(counter) + "files"


print(count("F:\\python"))
>>> import glob
>>> glob.glob("*.txt")
['ale.txt', 'alunni2015.txt', 'assenze.text.txt', 'text2.txt', 'untitled.txt']

You should include the path argument to listdir.

I agree, but I did not notice something also, that python2 requires the argument whilst python3 is optional, If you improve the answer for both python versions would be great :)

Ok, I went into Python 2 and find the differences and I edited the post.

python - How do I list all files of a directory? - Stack Overflow

python directory
Rectangle 27 3

If the merged CSV is going to be used in Python then just use glob to get a list of the files to pass to fileinput.input() via the files argument, then use the csv module to read it all in one go.

how to merge 200 csv files in Python - Stack Overflow

python csv merge
Rectangle 27 3

If the merged CSV is going to be used in Python then just use glob to get a list of the files to pass to fileinput.input() via the files argument, then use the csv module to read it all in one go.

how to merge 200 csv files in Python - Stack Overflow

python csv merge
Rectangle 27 7

Because the * is a shell construct. Python is literally looking for a file named "*" in the directory /home/me/test. Use listdir to get a list of the files first and then call remove on each one.

python - Remove all files in a directory - Stack Overflow

python unix
Rectangle 27 2

The following based on python os.walk function, which returns a list of tuples with: (dirname, dirs, files ). Since this is ruby, you get a list of arrays with: [dirname, dirs, files]. This should be easier to process than trying to recursively walk the directory yourself. To run the code, you'll need to provide a demo_folder.

def walk(dir)
  dir_list = []
  def _walk(dir, dir_list)
    fns = Dir.entries(dir)
    dirs = []
    files = []
    dirname = File.expand_path(dir)
    list_item = [dirname, dirs, files]
    fns.each do |fn|
      next if [".",".."].include? fn
      path_fn = File.join(dirname, fn)
      if File.directory? path_fn
        dirs << fn
        _walk(path_fn, dir_list)
      else
        files << fn
      end
    end
    dir_list << list_item
  end

  _walk(dir, dir_list)
  dir_list
end

if __FILE__ == $0

  require 'json'

  dir_list = walk('demo_folder')
  puts JSON.pretty_generate(dir_list)
end

Traversing directories and reading from files in ruby on rails - Stack...

ruby-on-rails ruby ruby-on-rails-3
Rectangle 27 4

In Python 2.X print is a keyword as you can see in this link. However, in Python 3.X print becomes a function, so the correct way to do it is print(something). You can get the list of keywords for each version by executing the following:

>>> import keyword
>>> keyword.kwlist

Syntax error on print with Python 3 - Stack Overflow

python python-3.x
Rectangle 27 2

read all lines in your command file get a list of python script file name and arguments like: " C:\hello.py and argument: word "

call them in below code style

from subprocess import call
call(["python C:\hello.py", "word"])
......

Python file to open a text file and run other python files in the text...

python python-2.7 windows-shell
Rectangle 27 2

read all lines in your command file get a list of python script file name and arguments like: " C:\hello.py and argument: word "

call them in below code style

from subprocess import call
call(["python C:\hello.py", "word"])
......

Python file to open a text file and run other python files in the text...

python python-2.7 windows-shell
Rectangle 27 2

You could use map to apply a function to each element of a list, and a get the resulting list (Python 2.x) / iterable (Python 3.x) back.

map(int, a)

It could be done with list comprehension too.

[int(x) for x in a]

Lists in Python - Stack Overflow

python list transformation
Rectangle 27 408

If we run the aforementioned solution from /tmp

My 50 cents for getting a pip freeze-like list from a Python script:

import pip
installed_packages = pip.get_installed_distributions()
installed_packages_list = sorted(["%s==%s" % (i.key, i.version)
     for i in installed_packages])
print(installed_packages_list)

As a (too long) one liner:

sorted(["%s==%s" % (i.key, i.version) for i in pip.get_installed_distributions()])
['behave==1.2.4', 'enum34==1.0', 'flask==0.10.1', 'itsdangerous==0.24', 
 'jinja2==2.7.2', 'jsonschema==2.3.0', 'markupsafe==0.23', 'nose==1.3.3', 
 'parse-type==0.3.4', 'parse==1.6.4', 'prettytable==0.7.2', 'requests==2.3.0',
 'six==1.6.1', 'vioozer-metadata==0.1', 'vioozer-users-server==0.1', 
 'werkzeug==0.9.4']

This solution applies to the system scope or to a virtual environment scope, and covers packages installed by setuptools, pip and (god forbid) easy_install.

I added the result of this call to my flask server, so when I call it with http://example.com/exampleServer/environment I get the list of packages installed on the server's virtualenv. It makes debugging a whole lot easier.

I have noticed a strange behaviour of this technique - when the Python interpreter is invoked in the same directory as a setup.py file, it does not list the package installed by setup.py.

$ cd /tmp
$ virtualenv test_env
New python executable in test_env/bin/python
Installing setuptools, pip...done.
$ source test_env/bin/activate
(test_env) $
(test_env) $ git clone https://github.com/behave/behave.git
Cloning into 'behave'...
remote: Reusing existing pack: 4350, done.
remote: Total 4350 (delta 0), reused 0 (delta 0)
Receiving objects: 100% (4350/4350), 1.85 MiB | 418.00 KiB/s, done.
Resolving deltas: 100% (2388/2388), done.
Checking connectivity... done.
setup.py
/tmp/behave
(test_env) $ ls /tmp/behave/setup.py
/tmp/behave/setup.py
(test_env) $ cd /tmp/behave && python setup.py install
running install
...
Installed /private/tmp/test_env/lib/python2.7/site-packages/enum34-1.0-py2.7.egg
Finished processing dependencies for behave==1.2.5a1
>>> import pip
>>> sorted(["%s==%s" % (i.key, i.version) for i in pip.get_installed_distributions()])
['enum34==1.0', 'parse-type==0.3.4', 'parse==1.6.4', 'six==1.6.1']
>>> import os
>>> os.getcwd()
'/private/tmp/behave'

behave==1.2.5a1 is missing from the second example, because the working directory contains behave's setup.py file.

I could not find any reference to this issue in the documentation. Perhaps I shall open a bug for it.

Thank you for this answer! I think it better answers the question because I ask "locally" installed Python modules. Pip freeze is also not always the way to go. This works better - I think.

@Masi Just added a detailed explanation of the caveat of this solution. It is indeed a strange one.

@AdamMatan Actually, if you're using multiple python installs with site-packages in several directories, this will fail. It works assuming all your installs have been registered on the same python instance (via pip/easy_install/setuptools) but fails if you've modified your python path to include other installs (or custom libraries). For a normal workflow, this is probably fine, but it may not be robust.

@AdamMatan yes, entire directories can be added manually to $PYTHONPATH (or equivalently sys.path) and this method will never notice them. In a clean setup, this probably won't happen, but my work has severe permissions restrictions so many of us run on a mashup of "canonical environment" with proprietary libs and "custom environment" where we have complete permissions. When I tested your method on either environment packages from the other didn't show up.

How can I get a list of locally installed Python modules? - Stack Over...

python module pip
Rectangle 27 402

If we run the aforementioned solution from /tmp

My 50 cents for getting a pip freeze-like list from a Python script:

import pip
installed_packages = pip.get_installed_distributions()
installed_packages_list = sorted(["%s==%s" % (i.key, i.version)
     for i in installed_packages])
print(installed_packages_list)

As a (too long) one liner:

sorted(["%s==%s" % (i.key, i.version) for i in pip.get_installed_distributions()])
['behave==1.2.4', 'enum34==1.0', 'flask==0.10.1', 'itsdangerous==0.24', 
 'jinja2==2.7.2', 'jsonschema==2.3.0', 'markupsafe==0.23', 'nose==1.3.3', 
 'parse-type==0.3.4', 'parse==1.6.4', 'prettytable==0.7.2', 'requests==2.3.0',
 'six==1.6.1', 'vioozer-metadata==0.1', 'vioozer-users-server==0.1', 
 'werkzeug==0.9.4']

This solution applies to the system scope or to a virtual environment scope, and covers packages installed by setuptools, pip and (god forbid) easy_install.

I added the result of this call to my flask server, so when I call it with http://example.com/exampleServer/environment I get the list of packages installed on the server's virtualenv. It makes debugging a whole lot easier.

I have noticed a strange behaviour of this technique - when the Python interpreter is invoked in the same directory as a setup.py file, it does not list the package installed by setup.py.

$ cd /tmp
$ virtualenv test_env
New python executable in test_env/bin/python
Installing setuptools, pip...done.
$ source test_env/bin/activate
(test_env) $
(test_env) $ git clone https://github.com/behave/behave.git
Cloning into 'behave'...
remote: Reusing existing pack: 4350, done.
remote: Total 4350 (delta 0), reused 0 (delta 0)
Receiving objects: 100% (4350/4350), 1.85 MiB | 418.00 KiB/s, done.
Resolving deltas: 100% (2388/2388), done.
Checking connectivity... done.
setup.py
/tmp/behave
(test_env) $ ls /tmp/behave/setup.py
/tmp/behave/setup.py
(test_env) $ cd /tmp/behave && python setup.py install
running install
...
Installed /private/tmp/test_env/lib/python2.7/site-packages/enum34-1.0-py2.7.egg
Finished processing dependencies for behave==1.2.5a1
>>> import pip
>>> sorted(["%s==%s" % (i.key, i.version) for i in pip.get_installed_distributions()])
['enum34==1.0', 'parse-type==0.3.4', 'parse==1.6.4', 'six==1.6.1']
>>> import os
>>> os.getcwd()
'/private/tmp/behave'

behave==1.2.5a1 is missing from the second example, because the working directory contains behave's setup.py file.

I could not find any reference to this issue in the documentation. Perhaps I shall open a bug for it.

Thank you for this answer! I think it better answers the question because I ask "locally" installed Python modules. Pip freeze is also not always the way to go. This works better - I think.

@Masi Just added a detailed explanation of the caveat of this solution. It is indeed a strange one.

@AdamMatan Actually, if you're using multiple python installs with site-packages in several directories, this will fail. It works assuming all your installs have been registered on the same python instance (via pip/easy_install/setuptools) but fails if you've modified your python path to include other installs (or custom libraries). For a normal workflow, this is probably fine, but it may not be robust.

@AdamMatan yes, entire directories can be added manually to $PYTHONPATH (or equivalently sys.path) and this method will never notice them. In a clean setup, this probably won't happen, but my work has severe permissions restrictions so many of us run on a mashup of "canonical environment" with proprietary libs and "custom environment" where we have complete permissions. When I tested your method on either environment packages from the other didn't show up.

How can I get a list of locally installed Python modules? - Stack Over...

python module pip
Rectangle 27 400

If we run the aforementioned solution from /tmp

My 50 cents for getting a pip freeze-like list from a Python script:

import pip
installed_packages = pip.get_installed_distributions()
installed_packages_list = sorted(["%s==%s" % (i.key, i.version)
     for i in installed_packages])
print(installed_packages_list)

As a (too long) one liner:

sorted(["%s==%s" % (i.key, i.version) for i in pip.get_installed_distributions()])
['behave==1.2.4', 'enum34==1.0', 'flask==0.10.1', 'itsdangerous==0.24', 
 'jinja2==2.7.2', 'jsonschema==2.3.0', 'markupsafe==0.23', 'nose==1.3.3', 
 'parse-type==0.3.4', 'parse==1.6.4', 'prettytable==0.7.2', 'requests==2.3.0',
 'six==1.6.1', 'vioozer-metadata==0.1', 'vioozer-users-server==0.1', 
 'werkzeug==0.9.4']

This solution applies to the system scope or to a virtual environment scope, and covers packages installed by setuptools, pip and (god forbid) easy_install.

I added the result of this call to my flask server, so when I call it with http://example.com/exampleServer/environment I get the list of packages installed on the server's virtualenv. It makes debugging a whole lot easier.

I have noticed a strange behaviour of this technique - when the Python interpreter is invoked in the same directory as a setup.py file, it does not list the package installed by setup.py.

$ cd /tmp
$ virtualenv test_env
New python executable in test_env/bin/python
Installing setuptools, pip...done.
$ source test_env/bin/activate
(test_env) $
(test_env) $ git clone https://github.com/behave/behave.git
Cloning into 'behave'...
remote: Reusing existing pack: 4350, done.
remote: Total 4350 (delta 0), reused 0 (delta 0)
Receiving objects: 100% (4350/4350), 1.85 MiB | 418.00 KiB/s, done.
Resolving deltas: 100% (2388/2388), done.
Checking connectivity... done.
setup.py
/tmp/behave
(test_env) $ ls /tmp/behave/setup.py
/tmp/behave/setup.py
(test_env) $ cd /tmp/behave && python setup.py install
running install
...
Installed /private/tmp/test_env/lib/python2.7/site-packages/enum34-1.0-py2.7.egg
Finished processing dependencies for behave==1.2.5a1
>>> import pip
>>> sorted(["%s==%s" % (i.key, i.version) for i in pip.get_installed_distributions()])
['enum34==1.0', 'parse-type==0.3.4', 'parse==1.6.4', 'six==1.6.1']
>>> import os
>>> os.getcwd()
'/private/tmp/behave'

behave==1.2.5a1 is missing from the second example, because the working directory contains behave's setup.py file.

I could not find any reference to this issue in the documentation. Perhaps I shall open a bug for it.

Thank you for this answer! I think it better answers the question because I ask "locally" installed Python modules. Pip freeze is also not always the way to go. This works better - I think.

@Masi Just added a detailed explanation of the caveat of this solution. It is indeed a strange one.

@AdamMatan Actually, if you're using multiple python installs with site-packages in several directories, this will fail. It works assuming all your installs have been registered on the same python instance (via pip/easy_install/setuptools) but fails if you've modified your python path to include other installs (or custom libraries). For a normal workflow, this is probably fine, but it may not be robust.

@AdamMatan yes, entire directories can be added manually to $PYTHONPATH (or equivalently sys.path) and this method will never notice them. In a clean setup, this probably won't happen, but my work has severe permissions restrictions so many of us run on a mashup of "canonical environment" with proprietary libs and "custom environment" where we have complete permissions. When I tested your method on either environment packages from the other didn't show up.

How can I get a list of locally installed Python modules? - Stack Over...

python module pip
Rectangle 27 400

If we run the aforementioned solution from /tmp

My 50 cents for getting a pip freeze-like list from a Python script:

import pip
installed_packages = pip.get_installed_distributions()
installed_packages_list = sorted(["%s==%s" % (i.key, i.version)
     for i in installed_packages])
print(installed_packages_list)

As a (too long) one liner:

sorted(["%s==%s" % (i.key, i.version) for i in pip.get_installed_distributions()])
['behave==1.2.4', 'enum34==1.0', 'flask==0.10.1', 'itsdangerous==0.24', 
 'jinja2==2.7.2', 'jsonschema==2.3.0', 'markupsafe==0.23', 'nose==1.3.3', 
 'parse-type==0.3.4', 'parse==1.6.4', 'prettytable==0.7.2', 'requests==2.3.0',
 'six==1.6.1', 'vioozer-metadata==0.1', 'vioozer-users-server==0.1', 
 'werkzeug==0.9.4']

This solution applies to the system scope or to a virtual environment scope, and covers packages installed by setuptools, pip and (god forbid) easy_install.

I added the result of this call to my flask server, so when I call it with http://example.com/exampleServer/environment I get the list of packages installed on the server's virtualenv. It makes debugging a whole lot easier.

I have noticed a strange behaviour of this technique - when the Python interpreter is invoked in the same directory as a setup.py file, it does not list the package installed by setup.py.

$ cd /tmp
$ virtualenv test_env
New python executable in test_env/bin/python
Installing setuptools, pip...done.
$ source test_env/bin/activate
(test_env) $
(test_env) $ git clone https://github.com/behave/behave.git
Cloning into 'behave'...
remote: Reusing existing pack: 4350, done.
remote: Total 4350 (delta 0), reused 0 (delta 0)
Receiving objects: 100% (4350/4350), 1.85 MiB | 418.00 KiB/s, done.
Resolving deltas: 100% (2388/2388), done.
Checking connectivity... done.
setup.py
/tmp/behave
(test_env) $ ls /tmp/behave/setup.py
/tmp/behave/setup.py
(test_env) $ cd /tmp/behave && python setup.py install
running install
...
Installed /private/tmp/test_env/lib/python2.7/site-packages/enum34-1.0-py2.7.egg
Finished processing dependencies for behave==1.2.5a1
>>> import pip
>>> sorted(["%s==%s" % (i.key, i.version) for i in pip.get_installed_distributions()])
['enum34==1.0', 'parse-type==0.3.4', 'parse==1.6.4', 'six==1.6.1']
>>> import os
>>> os.getcwd()
'/private/tmp/behave'

behave==1.2.5a1 is missing from the second example, because the working directory contains behave's setup.py file.

I could not find any reference to this issue in the documentation. Perhaps I shall open a bug for it.

Thank you for this answer! I think it better answers the question because I ask "locally" installed Python modules. Pip freeze is also not always the way to go. This works better - I think.

@Masi Just added a detailed explanation of the caveat of this solution. It is indeed a strange one.

@AdamMatan Actually, if you're using multiple python installs with site-packages in several directories, this will fail. It works assuming all your installs have been registered on the same python instance (via pip/easy_install/setuptools) but fails if you've modified your python path to include other installs (or custom libraries). For a normal workflow, this is probably fine, but it may not be robust.

@AdamMatan yes, entire directories can be added manually to $PYTHONPATH (or equivalently sys.path) and this method will never notice them. In a clean setup, this probably won't happen, but my work has severe permissions restrictions so many of us run on a mashup of "canonical environment" with proprietary libs and "custom environment" where we have complete permissions. When I tested your method on either environment packages from the other didn't show up.

How can I get a list of locally installed Python modules? - Stack Over...

python module pip
Rectangle 27 116

There is the dir(theobject) method to list all the fields and methods of your object (as a tuple) and the inspect module (as codeape write) to list the fields and methods with their doc (in """).

Because everything (even fields) might be called in Python, I'm not sure there is a built-in function to list only methods. You might want to try if the object you get through dir is callable or not.

How do I get list of methods in a Python class? - Stack Overflow

python
Rectangle 27 71

How to get the size of a list?

To find the size of a list, use the builtin function, len:

items = []
items.append("apple")
items.append("orange")
items.append("banana")
len(items)
len(s)

Return the length (the number of items) of an object. The argument may be a sequence (such as a string, bytes, tuple, list, or range) or a collection (such as a dictionary, set, or frozen set).

len is implemented with __len__, from the data model docs:

object.__len__(self)

Called to implement the built-in function len(). Should return the length of the object, an integer >= 0. Also, an object that doesnt define a __nonzero__() [in Python 2 or __bool__() in Python 3] method and whose __len__() method returns zero is considered to be false in a Boolean context.

And we can also see that __len__ is a method of lists:

items.__len__()

And in fact we see we can get this information for all of the described types:

>>> all(hasattr(cls, '__len__') for cls in (str, bytes, tuple, list, 
                                            xrange, dict, set, frozenset))
True

This should be the accepted answer. Nice simple code.

How to get the number of elements in a list in Python? - Stack Overflo...

python list
Rectangle 27 70

How to get the size of a list?

To find the size of a list, use the builtin function, len:

items = []
items.append("apple")
items.append("orange")
items.append("banana")
len(items)
len(s)

Return the length (the number of items) of an object. The argument may be a sequence (such as a string, bytes, tuple, list, or range) or a collection (such as a dictionary, set, or frozen set).

len is implemented with __len__, from the data model docs:

object.__len__(self)

Called to implement the built-in function len(). Should return the length of the object, an integer >= 0. Also, an object that doesnt define a __nonzero__() [in Python 2 or __bool__() in Python 3] method and whose __len__() method returns zero is considered to be false in a Boolean context.

And we can also see that __len__ is a method of lists:

items.__len__()

And in fact we see we can get this information for all of the described types:

>>> all(hasattr(cls, '__len__') for cls in (str, bytes, tuple, list, 
                                            xrange, dict, set, frozenset))
True

This should be the accepted answer. Nice simple code.

How to get the number of elements in a list in Python? - Stack Overflo...

python list
Rectangle 27 15

After looking at pip's code for a while, it looks like the code responsible for locating packages can be found in the PackageFinder class in pip.index. Its method find_requirement looks up the versions of a InstallRequirement, but unfortunately only returns the most recent version.

The code below is almost a 1:1 copy of the original function, with the return in line 114 changed to return all versions.

The script expects one package name as first and only argument and returns all versions.

I can't guarantee for the correctness, as I'm not familiar with pip's code.

$ python test.py pip
Versions of pip
0.8.2
0.8.1
0.8
0.7.2
0.7.1
0.7
0.6.3
0.6.2
0.6.1
0.6
0.5.1
0.5
0.4
0.3.1
0.3
0.2.1
0.2 dev
import posixpath
import pkg_resources
import sys
from pip.download import url_to_path
from pip.exceptions import DistributionNotFound
from pip.index import PackageFinder, Link
from pip.log import logger
from pip.req import InstallRequirement
from pip.util import Inf


class MyPackageFinder(PackageFinder):

    def find_requirement(self, req, upgrade):
        url_name = req.url_name
        # Only check main index if index URL is given:
        main_index_url = None
        if self.index_urls:
            # Check that we have the url_name correctly spelled:
            main_index_url = Link(posixpath.join(self.index_urls[0], url_name))
            # This will also cache the page, so it's okay that we get it again later:
            page = self._get_page(main_index_url, req)
            if page is None:
                url_name = self._find_url_name(Link(self.index_urls[0]), url_name, req) or req.url_name

        # Combine index URLs with mirror URLs here to allow
        # adding more index URLs from requirements files
        all_index_urls = self.index_urls + self.mirror_urls

        def mkurl_pypi_url(url):
            loc = posixpath.join(url, url_name)
            # For maximum compatibility with easy_install, ensure the path
            # ends in a trailing slash.  Although this isn't in the spec
            # (and PyPI can handle it without the slash) some other index
            # implementations might break if they relied on easy_install's behavior.
            if not loc.endswith('/'):
                loc = loc + '/'
            return loc
        if url_name is not None:
            locations = [
                mkurl_pypi_url(url)
                for url in all_index_urls] + self.find_links
        else:
            locations = list(self.find_links)
        locations.extend(self.dependency_links)
        for version in req.absolute_versions:
            if url_name is not None and main_index_url is not None:
                locations = [
                    posixpath.join(main_index_url.url, version)] + locations

        file_locations, url_locations = self._sort_locations(locations)

        locations = [Link(url) for url in url_locations]
        logger.debug('URLs to search for versions for %s:' % req)
        for location in locations:
            logger.debug('* %s' % location)
        found_versions = []
        found_versions.extend(
            self._package_versions(
                [Link(url, '-f') for url in self.find_links], req.name.lower()))
        page_versions = []
        for page in self._get_pages(locations, req):
            logger.debug('Analyzing links from page %s' % page.url)
            logger.indent += 2
            try:
                page_versions.extend(self._package_versions(page.links, req.name.lower()))
            finally:
                logger.indent -= 2
        dependency_versions = list(self._package_versions(
            [Link(url) for url in self.dependency_links], req.name.lower()))
        if dependency_versions:
            logger.info('dependency_links found: %s' % ', '.join([link.url for parsed, link, version in dependency_versions]))
        file_versions = list(self._package_versions(
                [Link(url) for url in file_locations], req.name.lower()))
        if not found_versions and not page_versions and not dependency_versions and not file_versions:
            logger.fatal('Could not find any downloads that satisfy the requirement %s' % req)
            raise DistributionNotFound('No distributions at all found for %s' % req)
        if req.satisfied_by is not None:
            found_versions.append((req.satisfied_by.parsed_version, Inf, req.satisfied_by.version))
        if file_versions:
            file_versions.sort(reverse=True)
            logger.info('Local files found: %s' % ', '.join([url_to_path(link.url) for parsed, link, version in file_versions]))
            found_versions = file_versions + found_versions
        all_versions = found_versions + page_versions + dependency_versions
        applicable_versions = []
        for (parsed_version, link, version) in all_versions:
            if version not in req.req:
                logger.info("Ignoring link %s, version %s doesn't match %s"
                            % (link, version, ','.join([''.join(s) for s in req.req.specs])))
                continue
            applicable_versions.append((link, version))
        applicable_versions = sorted(applicable_versions, key=lambda v: pkg_resources.parse_version(v[1]), reverse=True)
        existing_applicable = bool([link for link, version in applicable_versions if link is Inf])
        if not upgrade and existing_applicable:
            if applicable_versions[0][1] is Inf:
                logger.info('Existing installed version (%s) is most up-to-date and satisfies requirement'
                            % req.satisfied_by.version)
            else:
                logger.info('Existing installed version (%s) satisfies requirement (most up-to-date version is %s)'
                            % (req.satisfied_by.version, applicable_versions[0][1]))
            return None
        if not applicable_versions:
            logger.fatal('Could not find a version that satisfies the requirement %s (from versions: %s)'
                         % (req, ', '.join([version for parsed_version, link, version in found_versions])))
            raise DistributionNotFound('No distributions matching the version for %s' % req)
        if applicable_versions[0][0] is Inf:
            # We have an existing version, and its the best version
            logger.info('Installed version (%s) is most up-to-date (past versions: %s)'
                        % (req.satisfied_by.version, ', '.join([version for link, version in applicable_versions[1:]]) or 'none'))
            return None
        if len(applicable_versions) > 1:
            logger.info('Using version %s (newest of versions: %s)' %
                        (applicable_versions[0][1], ', '.join([version for link, version in applicable_versions])))
        return applicable_versions


if __name__ == '__main__':
    req = InstallRequirement.from_line(sys.argv[1], None)
    finder = MyPackageFinder([], ['http://pypi.python.org/simple/'])
    versions = finder.find_requirement(req, False)
    print 'Versions of %s' % sys.argv[1]
    for v in versions:
        print v[1]

This worked a whole lot better than the answer above. skinny $ yolk -V scipy scipy 0.12.0 skinny $ python test.py scipy Versions of scipy 0.12.0 0.12.0 0.11.0 0.11.0 0.10.1 0.10.1 0.10.0 0.10.0 0.9.0 0.9.0 0.8.0

This no longer works (at least not with pip 6.0.6)

virtualenv - Python and pip, list all versions of a package that's ava...

python virtualenv pip
Rectangle 27 15

After looking at pip's code for a while, it looks like the code responsible for locating packages can be found in the PackageFinder class in pip.index. Its method find_requirement looks up the versions of a InstallRequirement, but unfortunately only returns the most recent version.

The code below is almost a 1:1 copy of the original function, with the return in line 114 changed to return all versions.

The script expects one package name as first and only argument and returns all versions.

I can't guarantee for the correctness, as I'm not familiar with pip's code.

$ python test.py pip
Versions of pip
0.8.2
0.8.1
0.8
0.7.2
0.7.1
0.7
0.6.3
0.6.2
0.6.1
0.6
0.5.1
0.5
0.4
0.3.1
0.3
0.2.1
0.2 dev
import posixpath
import pkg_resources
import sys
from pip.download import url_to_path
from pip.exceptions import DistributionNotFound
from pip.index import PackageFinder, Link
from pip.log import logger
from pip.req import InstallRequirement
from pip.util import Inf


class MyPackageFinder(PackageFinder):

    def find_requirement(self, req, upgrade):
        url_name = req.url_name
        # Only check main index if index URL is given:
        main_index_url = None
        if self.index_urls:
            # Check that we have the url_name correctly spelled:
            main_index_url = Link(posixpath.join(self.index_urls[0], url_name))
            # This will also cache the page, so it's okay that we get it again later:
            page = self._get_page(main_index_url, req)
            if page is None:
                url_name = self._find_url_name(Link(self.index_urls[0]), url_name, req) or req.url_name

        # Combine index URLs with mirror URLs here to allow
        # adding more index URLs from requirements files
        all_index_urls = self.index_urls + self.mirror_urls

        def mkurl_pypi_url(url):
            loc = posixpath.join(url, url_name)
            # For maximum compatibility with easy_install, ensure the path
            # ends in a trailing slash.  Although this isn't in the spec
            # (and PyPI can handle it without the slash) some other index
            # implementations might break if they relied on easy_install's behavior.
            if not loc.endswith('/'):
                loc = loc + '/'
            return loc
        if url_name is not None:
            locations = [
                mkurl_pypi_url(url)
                for url in all_index_urls] + self.find_links
        else:
            locations = list(self.find_links)
        locations.extend(self.dependency_links)
        for version in req.absolute_versions:
            if url_name is not None and main_index_url is not None:
                locations = [
                    posixpath.join(main_index_url.url, version)] + locations

        file_locations, url_locations = self._sort_locations(locations)

        locations = [Link(url) for url in url_locations]
        logger.debug('URLs to search for versions for %s:' % req)
        for location in locations:
            logger.debug('* %s' % location)
        found_versions = []
        found_versions.extend(
            self._package_versions(
                [Link(url, '-f') for url in self.find_links], req.name.lower()))
        page_versions = []
        for page in self._get_pages(locations, req):
            logger.debug('Analyzing links from page %s' % page.url)
            logger.indent += 2
            try:
                page_versions.extend(self._package_versions(page.links, req.name.lower()))
            finally:
                logger.indent -= 2
        dependency_versions = list(self._package_versions(
            [Link(url) for url in self.dependency_links], req.name.lower()))
        if dependency_versions:
            logger.info('dependency_links found: %s' % ', '.join([link.url for parsed, link, version in dependency_versions]))
        file_versions = list(self._package_versions(
                [Link(url) for url in file_locations], req.name.lower()))
        if not found_versions and not page_versions and not dependency_versions and not file_versions:
            logger.fatal('Could not find any downloads that satisfy the requirement %s' % req)
            raise DistributionNotFound('No distributions at all found for %s' % req)
        if req.satisfied_by is not None:
            found_versions.append((req.satisfied_by.parsed_version, Inf, req.satisfied_by.version))
        if file_versions:
            file_versions.sort(reverse=True)
            logger.info('Local files found: %s' % ', '.join([url_to_path(link.url) for parsed, link, version in file_versions]))
            found_versions = file_versions + found_versions
        all_versions = found_versions + page_versions + dependency_versions
        applicable_versions = []
        for (parsed_version, link, version) in all_versions:
            if version not in req.req:
                logger.info("Ignoring link %s, version %s doesn't match %s"
                            % (link, version, ','.join([''.join(s) for s in req.req.specs])))
                continue
            applicable_versions.append((link, version))
        applicable_versions = sorted(applicable_versions, key=lambda v: pkg_resources.parse_version(v[1]), reverse=True)
        existing_applicable = bool([link for link, version in applicable_versions if link is Inf])
        if not upgrade and existing_applicable:
            if applicable_versions[0][1] is Inf:
                logger.info('Existing installed version (%s) is most up-to-date and satisfies requirement'
                            % req.satisfied_by.version)
            else:
                logger.info('Existing installed version (%s) satisfies requirement (most up-to-date version is %s)'
                            % (req.satisfied_by.version, applicable_versions[0][1]))
            return None
        if not applicable_versions:
            logger.fatal('Could not find a version that satisfies the requirement %s (from versions: %s)'
                         % (req, ', '.join([version for parsed_version, link, version in found_versions])))
            raise DistributionNotFound('No distributions matching the version for %s' % req)
        if applicable_versions[0][0] is Inf:
            # We have an existing version, and its the best version
            logger.info('Installed version (%s) is most up-to-date (past versions: %s)'
                        % (req.satisfied_by.version, ', '.join([version for link, version in applicable_versions[1:]]) or 'none'))
            return None
        if len(applicable_versions) > 1:
            logger.info('Using version %s (newest of versions: %s)' %
                        (applicable_versions[0][1], ', '.join([version for link, version in applicable_versions])))
        return applicable_versions


if __name__ == '__main__':
    req = InstallRequirement.from_line(sys.argv[1], None)
    finder = MyPackageFinder([], ['http://pypi.python.org/simple/'])
    versions = finder.find_requirement(req, False)
    print 'Versions of %s' % sys.argv[1]
    for v in versions:
        print v[1]

This worked a whole lot better than the answer above. skinny $ yolk -V scipy scipy 0.12.0 skinny $ python test.py scipy Versions of scipy 0.12.0 0.12.0 0.11.0 0.11.0 0.10.1 0.10.1 0.10.0 0.10.0 0.9.0 0.9.0 0.8.0

This no longer works (at least not with pip 6.0.6)

virtualenv - Python and pip, list all versions of a package that's ava...

python virtualenv pip