# Install the necessary dependencies

import os
import sys
import numpy as np
import pandas as pd
!{sys.executable} -m pip install --quiet jupyterlab_myst ipython

5.4.2. Data Selection#

5.4.2.1. Overview#

In this section, we’ll focus on how to slice, dice, and generally get and set subsets of Pandas objects.

5.4.2.2. Selection by label#

Whether a copy or a reference is returned for a setting operation, may depend on the context. This is sometimes called chained assignment and should be avoided.

.loc is strict when you present slicers that are not compatible (or convertible) with the index type. For example using integers in a DatetimeIndex. These will raise a TypeError.

dfl = pd.DataFrame(np.random.randn(5, 4),
                   columns=list('ABCD'),
                   index=pd.date_range('20130101', periods=5))

dfl.loc[2:3]

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[3], line 1
----> 1 dfl.loc[2:3]

File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexing.py:1073, in _LocationIndexer.__getitem__(self, key)
   1070 axis = self.axis or 0
   1072 maybe_callable = com.apply_if_callable(key, self.obj)
-> 1073 return self._getitem_axis(maybe_callable, axis=axis)

File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexing.py:1290, in _LocIndexer._getitem_axis(self, key, axis)
   1288 if isinstance(key, slice):
   1289     self._validate_key(key, axis)
-> 1290     return self._get_slice_axis(key, axis=axis)
   1291 elif com.is_bool_indexer(key):
   1292     return self._getbool_axis(key, axis=axis)

File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexing.py:1324, in _LocIndexer._get_slice_axis(self, slice_obj, axis)
   1321     return obj.copy(deep=False)
   1323 labels = obj._get_axis(axis)
-> 1324 indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, slice_obj.step)
   1326 if isinstance(indexer, slice):
   1327     return self.obj._slice(indexer, axis=axis)

File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexes/datetimes.py:809, in DatetimeIndex.slice_indexer(self, start, end, step, kind)
    801 # GH#33146 if start and end are combinations of str and None and Index is not
    802 # monotonic, we can not use Index.slice_indexer because it does not honor the
    803 # actual elements, is only searching for start and end
    804 if (
    805     check_str_or_none(start)
    806     or check_str_or_none(end)
    807     or self.is_monotonic_increasing
    808 ):
--> 809     return Index.slice_indexer(self, start, end, step, kind=kind)
    811 mask = np.array(True)
    812 deprecation_mask = np.array(True)

File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexes/base.py:6602, in Index.slice_indexer(self, start, end, step, kind)
   6559 """
   6560 Compute the slice indexer for input labels and step.
   6561 
   (...)
   6598 slice(1, 3, None)
   6599 """
   6600 self._deprecated_arg(kind, "kind", "slice_indexer")
-> 6602 start_slice, end_slice = self.slice_locs(start, end, step=step)
   6604 # return a slice
   6605 if not is_scalar(start_slice):

File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexes/base.py:6810, in Index.slice_locs(self, start, end, step, kind)
   6808 start_slice = None
   6809 if start is not None:
-> 6810     start_slice = self.get_slice_bound(start, "left")
   6811 if start_slice is None:
   6812     start_slice = 0

File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexes/base.py:6719, in Index.get_slice_bound(self, label, side, kind)
   6715 original_label = label
   6717 # For datetime indices label may be a string that has to be converted
   6718 # to datetime boundary according to its resolution.
-> 6719 label = self._maybe_cast_slice_bound(label, side)
   6721 # we need to look up the label
   6722 try:

File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexes/datetimes.py:767, in DatetimeIndex._maybe_cast_slice_bound(self, label, side, kind)
    762 if isinstance(label, date) and not isinstance(label, datetime):
    763     # Pandas supports slicing with dates, treated as datetimes at midnight.
    764     # https://github.com/pandas-dev/pandas/issues/31501
    765     label = Timestamp(label).to_pydatetime()
--> 767 label = super()._maybe_cast_slice_bound(label, side, kind=kind)
    768 self._deprecate_mismatched_indexing(label)
    769 return self._maybe_cast_for_get_loc(label)

File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexes/datetimelike.py:320, in DatetimeIndexOpsMixin._maybe_cast_slice_bound(self, label, side, kind)
    318     return lower if side == "left" else upper
    319 elif not isinstance(label, self._data._recognized_scalars):
--> 320     raise self._invalid_indexer("slice", label)
    322 return label

TypeError: cannot do slice indexing on DatetimeIndex with these indexers [2] of type int

String likes in slicing can be convertible to the type of the index and lead to natural slicing.

dfl.loc['20130102':'20130104']

	A	B	C	D
2013-01-02	0.588777	1.250445	-0.276583	-0.121396
2013-01-03	0.967615	-1.820374	0.565907	-0.893642
2013-01-04	0.732792	0.428791	-1.568156	-0.182880

from IPython.display import HTML

display(
    HTML(
        """


<link rel="stylesheet" href="https://ocademy-ai.github.io/machine-learning/_static/style.css">

<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:730px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%20%2810%29%0Adfl%20%3D%20pd.DataFrame%28np.random.randn%285,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dpd.date_range%28'20130101',%20periods%3D5%29%29%0Adfl%0Adfl.loc%5B'20130102'%3A'20130104'%5D&d=2023-07-13&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>

"""
    )
)

Let's visualize it! 🎥

Pandas will raise a KeyError if indexing with a list with missing labels.

Pandas provides a suite of methods in order to have purely label-based indexing. This is a strict inclusion-based protocol. Every label asked for must be in the index, or a KeyError will be raised. When slicing, both the start bound AND the stop bound are included, if present in the index. Integers are valid labels, but they refer to the label and not the position.

The .loc attribute is the primary access method. The following are valid inputs:
A single label, e.g. 5 or 'a' (Note that 5 is interpreted as a label of the index. This use is not an integer position along the index.).
A list or array of labels ['a', 'b', 'c'].
A slice object with labels 'a':'f' (Note that contrary to usual Python slices, both the start and the stop are included, when present in the index!
A boolean array.
A callable.

s1 = pd.Series(np.random.randn(6), index=list('abcdef'))
s1
s1.loc['c':]

c    1.519834
d   -0.629201
e   -0.726953
f    1.290481
dtype: float64

from IPython.display import HTML

display(
    HTML(
        """


<link rel="stylesheet" href="https://ocademy-ai.github.io/machine-learning/_static/style.css">

<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:730px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0As1%20%3D%20pd.Series%28np.random.randn%286%29,%20index%3Dlist%28'abcdef'%29%29%0As1.loc%5B'c'%3A%5D%0A&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>

"""
    )
)

Let's visualize it! 🎥

s1.loc['b']

0.3983111784912009

from IPython.display import HTML

display(
    HTML(
        """


<link rel="stylesheet" href="https://ocademy-ai.github.io/machine-learning/_static/style.css">
<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:630px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0As1%20%3D%20pd.Series%28np.random.randn%286%29,%20index%3Dlist%28'abcdef'%29%29%0As1.loc%5B'b'%5D%0A&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>


"""
    )
)

Let's visualize it! 🎥

Note that the setting works as well:

s1.loc['c':] = 0
s1

a    0.229543
b    0.398311
c    0.000000
d    0.000000
e    0.000000
f    0.000000
dtype: float64

from IPython.display import HTML

display(
    HTML(
        """


<link rel="stylesheet" href="https://ocademy-ai.github.io/machine-learning/_static/style.css">
<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:630px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0As1%20%3D%20pd.Series%28np.random.randn%286%29,%20index%3Dlist%28'abcdef'%29%29%0As1.loc%5B'c'%3A%5D%20%3D%200%0As1&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>
"""
    )
)

Let's visualize it! 🎥

df1 = pd.DataFrame(np.random.randn(6, 4),
                   index=list('abcdef'),
                   columns=list('ABCD'))
df1
df1.loc[['a', 'b', 'd'], :]

	A	B	C	D
a	1.582003	0.987351	-0.522115	0.683331
b	-0.020445	-1.267777	-0.030918	1.174362
d	0.242241	0.194438	-0.405266	0.569506

from IPython.display import HTML

display(
    HTML(
        """



<link rel="stylesheet" href="https://ocademy-ai.github.io/machine-learning/_static/style.css">
<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:680px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1%0Adf1.loc%5B%5B'a',%20'b',%20'd'%5D,%20%3A%5D%0A&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>
"""
    )
)

Let's visualize it! 🎥

Accessing via label slices:

df1.loc['d':, 'A':'C']

	A	B	C
d	0.242241	0.194438	-0.405266
e	0.601422	1.325717	1.402254
f	0.788776	1.725383	0.271514

For getting a cross-section using a label (equivalent to df.xs('a')):

df1.loc['a']

A    1.582003
B    0.987351
C   -0.522115
D    0.683331
Name: a, dtype: float64

from IPython.display import HTML

display(
    HTML(
        """



<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:690px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1%0Adf1.loc%5B'a'%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>

"""
    )
)

Let's visualize it! 🎥

For getting values with a boolean array:

df1.loc['a'] > 0

A     True
B     True
C    False
D     True
Name: a, dtype: bool

from IPython.display import HTML

display(
    HTML(
        """



<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:650px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1%0Adf1.loc%5B'a'%5D%20%3E%200&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>
"""
    )
)

Let's visualize it! 🎥

df1.loc[:, df1.loc['a'] > 0]

	A	B	D
a	1.582003	0.987351	0.683331
b	-0.020445	-1.267777	1.174362
c	0.945725	-0.525497	-0.085318
d	0.242241	0.194438	0.569506
e	0.601422	1.325717	-0.200271
f	0.788776	1.725383	-0.974910

from IPython.display import HTML

display(
    HTML(
        """



<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:700px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1%0Adf1.loc%5B%3A,%20df1.loc%5B'a'%5D%20%3E%200%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>


"""
    )
)

Let's visualize it! 🎥

NA values in a boolean array propagate as False:

mask = pd.array([True, False, True, False, pd.NA, False], dtype="boolean")
mask

<BooleanArray>
[True, False, True, False, <NA>, False]
Length: 6, dtype: boolean

df1[mask]

	A	B	C	D
a	1.582003	0.987351	-0.522115	0.683331
c	0.945725	-0.525497	-0.891158	-0.085318

from IPython.display import HTML

display(
    HTML(
        """



<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:680px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Amask%20%3D%20pd.array%28%5BTrue,%20False,%20True,%20False,%20pd.NA,%20False%5D,%20dtype%3D%22boolean%22%29%0Adf1%5Bmask%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>


"""
    )
)

Let's visualize it! 🎥

For getting a value explicitly:

df1.loc['a', 'A'] # this is also equivalent to ``df1.at['a','A']``

1.5820025794487522

from IPython.display import HTML

display(
    HTML(
        """



<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:680px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1%0Adf1.loc%5B'a',%20'A'%5D%20&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>



"""
    )
)

Let's visualize it! 🎥

5.4.2.3. Slicing with labels#

When using .loc with slices, if both the start and the stop labels are present in the index, then elements located between the two (including them) are returned:

s = pd.Series(list('abcde'), index=[0, 3, 2, 5, 4])
s.loc[3:5]

  b
  c
  d
dtype: object

from IPython.display import HTML

display(
    HTML(
        """



<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:580px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0As%20%3D%20pd.Series%28list%28'abcde'%29,%20index%3D%5B0,%203,%202,%205,%204%5D%29%0As.loc%5B3%3A5%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>




"""
    )
)

Let's visualize it! 🎥

If at least one of the two is absent, but the index is sorted, and can be compared against start and stop labels, then slicing will still work as expected, by selecting labels which rank between the two:

s.sort_index()

  a
  c
  b
  e
  d
dtype: object

from IPython.display import HTML

display(
    HTML(
        """



<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:600px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0As%20%3D%20pd.Series%28list%28'abcde'%29,%20index%3D%5B0,%203,%202,%205,%204%5D%29%0As.sort_index%28%29&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>



"""
    )
)

Let's visualize it! 🎥

s.sort_index().loc[1:6]

  c
  b
  e
  d
dtype: object

from IPython.display import HTML

display(
    HTML(
        """



<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:820px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0As%20%3D%20pd.Series%28list%28'abcde'%29,%20index%3D%5B0,%203,%202,%205,%204%5D%29%0As.sort_index%28%29.loc%5B1%3A6%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>



"""
    )
)

Let's visualize it! 🎥

However, if at least one of the two is absent and the index is not sorted, an error will be raised (since doing otherwise would be computationally expensive, as well as potentially ambiguous for mixed-type indexes). For instance, in the above example, s.loc[1:6] would raise KeyError.

s = pd.Series(list('abcdef'), index=[0, 3, 2, 5, 4, 2])
s.loc[3:5]

  b
  c
  d
dtype: object

from IPython.display import HTML

display(
    HTML(
        """



<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:650px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0As%20%3D%20pd.Series%28list%28'abcdef'%29,%20index%3D%5B0,%203,%202,%205,%204,%202%5D%29%0As.loc%5B3%3A5%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>


"""
    )
)

Let's visualize it! 🎥

Also, if the index has duplicate labels and either the start or the stop label is duplicated, an error will be raised. For instance, in the above example, s.loc[2:5] would raise a KeyError.

5.4.2.4. Selection by position#

Whether a copy or a reference is returned for a setting operation, may depend on the context. This is sometimes called chained assignment and should be avoided.

Pandas provides a suite of methods in order to get purely integer-based indexing. The semantics follow closely Python and NumPy slicing. These are 0-based indexing. When slicing, the start bound is included, while the upper bound is excluded. Trying to use a non-integer, even a valid label will raise an IndexError.

The .iloc attribute is the primary access method. The following are valid inputs:

An integer e.g. 5.
A list or array of integers [4, 3, 0].
A slice object with ints 1:7.
A boolean array.
A callable.

s1 = pd.Series(np.random.randn(5), index=list(range(0, 10, 2)))
s1
s1.iloc[:3]

 -0.805919
  0.428108
  1.812731
dtype: float64

from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:600px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0As1%20%3D%20pd.Series%28np.random.randn%285%29,%20index%3Dlist%28range%280,%2010,%202%29%29%29%0As1.iloc%5B%3A3%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>

"""
    )
)

Let's visualize it! 🎥

s1.iloc[3]

1.9849031639081407

from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:600px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0As1%20%3D%20pd.Series%28np.random.randn%285%29,%20index%3Dlist%28range%280,%2010,%202%29%29%29%0As1.iloc%5B%3A3%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>

"""
    )
)

Let's visualize it! 🎥

Note that setting works as well:

s1.iloc[:3] = 0
s1

  0.000000
  0.000000
  0.000000
  1.984903
  0.475023
dtype: float64

from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:620px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0As1%20%3D%20pd.Series%28np.random.randn%285%29,%20index%3Dlist%28range%280,%2010,%202%29%29%29%0As1.iloc%5B%3A3%5D%20%3D%200%0As1&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>

"""
    )
)

Let's visualize it! 🎥

With a DataFrame,Select via integer slicing:

df1 = pd.DataFrame(np.random.randn(6, 4),
                   index=list(range(0, 12, 2)),
                   columns=list(range(0, 8, 2)))
df1
df1.iloc[:3]

	0	2	4	6
0	-0.527504	0.054396	0.112646	-0.082599
2	0.084774	-0.332679	-0.528022	-1.085305
4	-1.828948	-0.163621	-0.411630	-0.927196

from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:680px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28range%280,%2012,%202%29%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28range%280,%208,%202%29%29%29%0Adf1%0Adf1.iloc%5B%3A3%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>
"""
    )
)

Let's visualize it! 🎥

df1.iloc[1:5, 2:4]

	4	6
2	-0.528022	-1.085305
4	-0.411630	-0.927196
6	1.496097	-0.207705
8	-0.239940	-0.255905

Select via integer list:

df1.iloc[[1, 3, 5], [1, 3]]

	2	6
2	-0.332679	-1.085305
6	-1.243526	-0.207705
10	0.019804	0.761351

from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:700px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28range%280,%2012,%202%29%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28range%280,%208,%202%29%29%29%0Adf1%0Adf1.iloc%5B1%3A5,%202%3A4%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>
"""
    )
)

Let's visualize it! 🎥

df1.iloc[1:3, :]

	0	2	4	6
2	0.084774	-0.332679	-0.528022	-1.085305
4	-1.828948	-0.163621	-0.411630	-0.927196

from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:680px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28range%280,%2012,%202%29%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28range%280,%208,%202%29%29%29%0Adf1%0Adf1.iloc%5B1%3A3,%20%3A%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>



"""
    )
)

Let's visualize it! 🎥

df1.iloc[:, 1:3]

	2	4
0	0.054396	0.112646
2	-0.332679	-0.528022
4	-0.163621	-0.411630
6	-1.243526	1.496097
8	-1.219965	-0.239940
10	0.019804	-0.455306

from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:700px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28range%280,%2012,%202%29%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28range%280,%208,%202%29%29%29%0Adf1%0Adf1.iloc%5B%3A,%201%3A3%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>


"""
    )
)

Let's visualize it! 🎥

df1.iloc[1, 1] # this is also equivalent to ``df1.iat[1,1]``

-0.3326786237713185

from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:680px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28range%280,%2012,%202%29%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28range%280,%208,%202%29%29%29%0Adf1%0Adf1.iloc%5B1,%201%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>


"""
    )
)

Let's visualize it! 🎥

For getting a cross-section using an integer position (equiv to df.xs(1)):

df1.iloc[1]

  0.084774
 -0.332679
 -0.528022
 -1.085305
Name: 2, dtype: float64

from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:700px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28range%280,%2012,%202%29%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28range%280,%208,%202%29%29%29%0Adf1%0Adf1.iloc%5B1%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>


"""
    )
)

Let's visualize it! 🎥

Out-of-range slice indexes are handled gracefully just as in Python/NumPy.

x = list('abcdef') # these are allowed in Python/NumPy.
x

['a', 'b', 'c', 'd', 'e', 'f']

x[4:10]

['e', 'f']

x[8:10]

[]

s = pd.Series(x)
s

  a
  b
  c
  d
  e
  f
dtype: object

s.iloc[4:10]

4    e
5    f
dtype: object

from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:600px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Ax%20%3D%20list%28'abcdef'%29%20%0As%20%3D%20pd.Series%28x%29%0As.iloc%5B4%3A10%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>

"""
    )
)

Let's visualize it! 🎥

s.iloc[8:10]

Series([], dtype: object)

Note that using slices that go out of bounds can result in an empty axis (e.g. an empty DataFrame being returned).

dfl = pd.DataFrame(np.random.randn(5, 2), columns=list('AB'))

dfl.iloc[:, 2:3]


0
1
2
3
4

from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:630px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%20%0Anp.random.seed%2810%29%0Adfl%20%3D%20pd.DataFrame%28np.random.randn%285,%202%29,%20columns%3Dlist%28'AB'%29%29%0Adfl%0Adfl.iloc%5B%3A,%202%3A3%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>
"""
    )
)

Let's visualize it! 🎥

dfl.iloc[:, 1:3]

	B
0	-1.035027
1	-1.430560
2	0.642986
3	0.487318
4	-2.004848

from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:630px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%20%0Anp.random.seed%2810%29%0Adfl%20%3D%20pd.DataFrame%28np.random.randn%285,%202%29,%20columns%3Dlist%28'AB'%29%29%0Adfl.iloc%5B%3A,%201%3A3%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>

"""
    )
)

Let's visualize it! 🎥

dfl.iloc[4:6]

	A	B
4	-0.323008	-2.004848

from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:600px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%20%0Anp.random.seed%2810%29%0Adfl%20%3D%20pd.DataFrame%28np.random.randn%285,%202%29,%20columns%3Dlist%28'AB'%29%29%0Adfl.iloc%5B4%3A6%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>

"""
    )
)

Let's visualize it! 🎥

A single indexer that is out of bounds will raise an IndexError. A list of indexers where any element is out of bounds will raise an IndexError.

dfl.iloc[[4, 5, 6]]

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexing.py:1587, in _iLocIndexer._get_list_axis(self, key, axis)
   1586 try:
-> 1587     return self.obj._take_with_is_copy(key, axis=axis)
   1588 except IndexError as err:
   1589     # re-raise with different error message

File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/generic.py:3902, in NDFrame._take_with_is_copy(self, indices, axis)
   3895 """
   3896 Internal version of the `take` method that sets the `_is_copy`
   3897 attribute to keep track of the parent dataframe (using in indexing
   (...)
   3900 See the docstring of `take` for full explanation of the parameters.
   3901 """
-> 3902 result = self._take(indices=indices, axis=axis)
   3903 # Maybe set copy if we didn't actually change the index.

File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/generic.py:3886, in NDFrame._take(self, indices, axis, convert_indices)
   3884 self._consolidate_inplace()
-> 3886 new_data = self._mgr.take(
   3887     indices,
   3888     axis=self._get_block_manager_axis(axis),
   3889     verify=True,
   3890     convert_indices=convert_indices,
   3891 )
   3892 return self._constructor(new_data).__finalize__(self, method="take")

File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/internals/managers.py:977, in BaseBlockManager.take(self, indexer, axis, verify, convert_indices)
    976 if convert_indices:
--> 977     indexer = maybe_convert_indices(indexer, n, verify=verify)
    979 new_labels = self.axes[axis].take(indexer)

File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexers/utils.py:286, in maybe_convert_indices(indices, n, verify)
    285     if mask.any():
--> 286         raise IndexError("indices are out-of-bounds")
    287 return indices

IndexError: indices are out-of-bounds

The above exception was the direct cause of the following exception:

IndexError                                Traceback (most recent call last)
Cell In[67], line 1
----> 1 dfl.iloc[[4, 5, 6]]

File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexing.py:1073, in _LocationIndexer.__getitem__(self, key)
   1070 axis = self.axis or 0
   1072 maybe_callable = com.apply_if_callable(key, self.obj)
-> 1073 return self._getitem_axis(maybe_callable, axis=axis)

File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexing.py:1616, in _iLocIndexer._getitem_axis(self, key, axis)
   1614 # a list of integers
   1615 elif is_list_like_indexer(key):
-> 1616     return self._get_list_axis(key, axis=axis)
   1618 # a single integer
   1619 else:
   1620     key = item_from_zerodim(key)

File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexing.py:1590, in _iLocIndexer._get_list_axis(self, key, axis)
   1587     return self.obj._take_with_is_copy(key, axis=axis)
   1588 except IndexError as err:
   1589     # re-raise with different error message
-> 1590     raise IndexError("positional indexers are out-of-bounds") from err

IndexError: positional indexers are out-of-bounds

dfl.iloc[:, 4]

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
Cell In[68], line 1
----> 1 dfl.iloc[:, 4]

File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexing.py:1067, in _LocationIndexer.__getitem__(self, key)
   1065     if self._is_scalar_access(key):
   1066         return self.obj._get_value(*key, takeable=self._takeable)
-> 1067     return self._getitem_tuple(key)
   1068 else:
   1069     # we by definition only have the 0th axis
   1070     axis = self.axis or 0

File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexing.py:1563, in _iLocIndexer._getitem_tuple(self, tup)
   1561 def _getitem_tuple(self, tup: tuple):
-> 1563     tup = self._validate_tuple_indexer(tup)
   1564     with suppress(IndexingError):
   1565         return self._getitem_lowerdim(tup)

File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexing.py:873, in _LocationIndexer._validate_tuple_indexer(self, key)
    871 for i, k in enumerate(key):
    872     try:
--> 873         self._validate_key(k, i)
    874     except ValueError as err:
    875         raise ValueError(
    876             "Location based indexing can only have "
    877             f"[{self._valid_types}] types"
    878         ) from err

File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexing.py:1466, in _iLocIndexer._validate_key(self, key, axis)
   1464     return
   1465 elif is_integer(key):
-> 1466     self._validate_integer(key, axis)
   1467 elif isinstance(key, tuple):
   1468     # a tuple should already have been caught by this point
   1469     # so don't treat a tuple as a valid indexer
   1470     raise IndexingError("Too many indexers")

File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexing.py:1557, in _iLocIndexer._validate_integer(self, key, axis)
   1555 len_axis = len(self.obj._get_axis(axis))
   1556 if key >= len_axis or key < -len_axis:
-> 1557     raise IndexError("single positional indexer is out-of-bounds")

IndexError: single positional indexer is out-of-bounds

5.4.2.5. Selection by callable#

.loc, .iloc, and also [] indexing can accept a callable as indexer. The callable must be a function with one argument (the calling Series or DataFrame) that returns valid output for indexing.

df1 = pd.DataFrame(np.random.randn(6, 4),
                   index=list('abcdef'),
                   columns=list('ABCD'))
df1
df1.loc[lambda df: df['A'] > 0, :]

	A	B	C	D
b	1.601640	0.785938	0.559648	1.609879
c	0.298113	-0.420427	0.918680	0.436950
d	0.961635	-0.252276	1.326258	-1.436899
e	0.192715	0.390917	-0.232632	0.392365

from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:700px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%20%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1%0Adf1.loc%5Blambda%20df%3A%20df%5B'A'%5D%20%3E%200,%20%3A%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>
"""
    )
)

Let's visualize it! 🎥

df1.loc[:, lambda df: ['A', 'B']]

	A	B
a	-0.553925	0.516329
b	1.601640	0.785938
c	0.298113	-0.420427
d	0.961635	-0.252276
e	0.192715	0.390917
f	-0.963618	0.796912

from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:700px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1.loc%5B%3A,%20lambda%20df%3A%20%5B'A',%20'B'%5D%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>



"""
    )
)

Let's visualize it! 🎥

df1.iloc[:, lambda df: [0, 1]]

	A	B
a	-0.553925	0.516329
b	1.601640	0.785938
c	0.298113	-0.420427
d	0.961635	-0.252276
e	0.192715	0.390917
f	-0.963618	0.796912

from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:700px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1.iloc%5B%3A,%20lambda%20df%3A%20%5B0,%201%5D%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>


"""
    )
)

Let's visualize it! 🎥

df1[lambda df: df.columns[0]]

a   -0.553925
b    1.601640
c    0.298113
d    0.961635
e    0.192715
f   -0.963618
Name: A, dtype: float64

from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:650px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1%5Blambda%20df%3A%20df.columns%5B0%5D%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>

"""
    )
)

Let's visualize it! 🎥

You can use callable indexing in Series.

df1['A'].loc[lambda s: s > 0]

b    1.601640
c    0.298113
d    0.961635
e    0.192715
Name: A, dtype: float64

from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:920px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1%5B'A'%5D.loc%5Blambda%20s%3A%20s%20%3E%200%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>

"""
    )
)

Let's visualize it! 🎥

5.4.2.5.1. Combining positional and label-based indexing#

If you wish to get the 0th and the 2nd elements from the index in the 'A' column, you can do:

dfd = pd.DataFrame({'A': [1, 2, 3],
                    'B': [4, 5, 6]},
                   index=list('abc'))
dfd
dfd.loc[dfd.index[[0, 2]], 'A']

a    1
c    3
Name: A, dtype: int64

from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:550px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Adfd%20%3D%20pd.DataFrame%28%7B'A'%3A%20%5B1,%202,%203%5D,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20'B'%3A%20%5B4,%205,%206%5D%7D,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abc'%29%29%0Adfd.loc%5Bdfd.index%5B%5B0,%202%5D%5D,%20'A'%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>

"""
    )
)

Let's visualize it! 🎥

This can also be expressed using .iloc, by explicitly getting locations on the indexers, and using positional indexing to select things.

dfd.iloc[[0, 2], dfd.columns.get_loc('A')]

a    1
c    3
Name: A, dtype: int64

from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:550px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Adfd%20%3D%20pd.DataFrame%28%7B'A'%3A%20%5B1,%202,%203%5D,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20'B'%3A%20%5B4,%205,%206%5D%7D,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abc'%29%29%0Adfd.iloc%5B%5B0,%202%5D,%20dfd.columns.get_loc%28'A'%29%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>



"""
    )
)

Let's visualize it! 🎥

For getting multiple indexers, using .get_indexer:

dfd.iloc[[0, 2], dfd.columns.get_indexer(['A', 'B'])]

	A	B
a	1	4
c	3	6

from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
    <div class="pandastutor inner" style="height:550px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Adfd%20%3D%20pd.DataFrame%28%7B'A'%3A%20%5B1,%202,%203%5D,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20'B'%3A%20%5B4,%205,%206%5D%7D,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abc'%29%29%0Adfd.iloc%5B%5B0,%202%5D,%20dfd.columns.get_indexer%28%5B'A',%20'B'%5D%29%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>


"""
    )
)

Let's visualize it! 🎥

5.4.2.6. Acknowledgments#

Thanks for Pandas user guide. It contributes the majority of the content in this chapter.

Ocademy Open Machine Learning Book

Data Selection

Contents

5.4.2. Data Selection#

5.4.2.1. Overview#

5.4.2.2. Selection by label#

5.4.2.3. Slicing with labels#

5.4.2.4. Selection by position#

5.4.2.5. Selection by callable#

5.4.2.5.1. Combining positional and label-based indexing#

5.4.2.6. Acknowledgments#