In [None]:
# Install the necessary dependencies

import os
import sys
import numpy as np
import pandas as pd
!{sys.executable} -m pip install --quiet jupyterlab_myst ipython

---
license:
    code: MIT
    content: CC-BY-4.0
github: https://github.com/ocademy-ai/machine-learning
venue: By Ocademy
open_access: true
bibliography:
  - https://raw.githubusercontent.com/ocademy-ai/machine-learning/main/open-machine-learning-jupyter-book/references.bib
---

# Data Selection 

## Overview

In this section, we'll focus on how to slice, dice, and generally get and set subsets of Pandas objects.

## Selection by label

Whether a copy or a reference is returned for a setting operation, may depend on the context. This is sometimes called `chained assignment` and should be avoided.

`.loc` is strict when you present slicers that are not compatible (or convertible) with the index type. For example using integers in a `DatetimeIndex`. These will raise a `TypeError`.

In [2]:
dfl = pd.DataFrame(np.random.randn(5, 4),
                   columns=list('ABCD'),
                   index=pd.date_range('20130101', periods=5))

In [None]:
dfl.loc[2:3]

String likes in slicing can be convertible to the type of the index and lead to natural slicing.

In [None]:
dfl.loc['20130102':'20130104']

Unnamed: 0,A,B,C,D
2013-01-02,1.481584,-1.691289,-0.086724,-0.393754
2013-01-03,0.476774,0.60545,-0.091083,-1.410096
2013-01-04,0.035828,-0.095133,1.377407,0.49522


In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<link rel="stylesheet" href="https://ocademy-ai.github.io/machine-learning/_static/style.css">

<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:730px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%20%2810%29%0Adfl%20%3D%20pd.DataFrame%28np.random.randn%285,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dpd.date_range%28'20130101',%20periods%3D5%29%29%0Adfl%0Adfl.loc%5B'20130102'%3A'20130104'%5D&d=2023-07-13&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>

"""
    )
)


Pandas will raise a `KeyError` if indexing with a list with missing labels.

Pandas provides a suite of methods in order to have **purely label-based indexing**. This is a strict inclusion-based protocol. Every label asked for must be in the index, or a `KeyError` will be raised. When slicing, both the start bound **AND** the stop bound are included, if present in the index. Integers are valid labels, but they refer to the label **and not the position**.

- The `.loc` attribute is the primary access method. The following are valid inputs:

- A single label, e.g. `5` or `'a'` (Note that `5` is interpreted as a label of the index. This use is not an integer position along the index.).

- A list or array of labels `['a', 'b', 'c']`.

- A slice object with labels `'a':'f'` (Note that contrary to usual Python slices, both the start and the stop are included, when present in the index!

- A boolean array.

- A `callable`.

In [None]:
s1 = pd.Series(np.random.randn(6), index=list('abcdef'))
s1
s1.loc['c':]

c    0.152695
d   -0.615396
e    0.203773
f    1.487611
dtype: float64

In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<link rel="stylesheet" href="https://ocademy-ai.github.io/machine-learning/_static/style.css">

<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:730px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0As1%20%3D%20pd.Series%28np.random.randn%286%29,%20index%3Dlist%28'abcdef'%29%29%0As1.loc%5B'c'%3A%5D%0A&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>

"""
    )
)


In [None]:
s1.loc['b']

1.8417073794042274

In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<link rel="stylesheet" href="https://ocademy-ai.github.io/machine-learning/_static/style.css">
<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:630px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0As1%20%3D%20pd.Series%28np.random.randn%286%29,%20index%3Dlist%28'abcdef'%29%29%0As1.loc%5B'b'%5D%0A&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>


"""
    )
)


Note that the setting works as well:

In [None]:
s1.loc['c':] = 0
s1

a   -0.221293
b    1.841707
c    0.000000
d    0.000000
e    0.000000
f    0.000000
dtype: float64

In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<link rel="stylesheet" href="https://ocademy-ai.github.io/machine-learning/_static/style.css">
<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:630px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0As1%20%3D%20pd.Series%28np.random.randn%286%29,%20index%3Dlist%28'abcdef'%29%29%0As1.loc%5B'c'%3A%5D%20%3D%200%0As1&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>
"""
    )
)


In [None]:
df1 = pd.DataFrame(np.random.randn(6, 4),
                   index=list('abcdef'),
                   columns=list('ABCD'))
df1
df1.loc[['a', 'b', 'd'], :]

Unnamed: 0,A,B,C,D
a,-0.829219,1.185075,0.093787,-0.44214
b,-0.473605,-0.317633,-0.047595,-1.409355
d,-0.721064,1.436217,-2.073527,0.452794


In [None]:
from IPython.display import HTML

display(
    HTML(
        """



<link rel="stylesheet" href="https://ocademy-ai.github.io/machine-learning/_static/style.css">
<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:680px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1%0Adf1.loc%5B%5B'a',%20'b',%20'd'%5D,%20%3A%5D%0A&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>
"""
    )
)


Accessing via label slices:

In [None]:
df1.loc['d':, 'A':'C']

Unnamed: 0,A,B,C
d,-0.721064,1.436217,-2.073527
e,0.400573,1.644355,-0.021278
f,-0.282458,-0.657392,-0.091122


For getting a cross-section using a label (equivalent to `df.xs('a')`):

In [None]:
df1.loc['a']

A   -0.829219
B    1.185075
C    0.093787
D   -0.442140
Name: a, dtype: float64

In [None]:
from IPython.display import HTML

display(
    HTML(
        """



<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:690px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1%0Adf1.loc%5B'a'%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>

"""
    )
)


For getting values with a boolean array:

In [None]:
df1.loc['a'] > 0

A    False
B     True
C     True
D    False
Name: a, dtype: bool

In [None]:
from IPython.display import HTML

display(
    HTML(
        """



<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:650px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1%0Adf1.loc%5B'a'%5D%20%3E%200&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>
"""
    )
)


In [None]:
df1.loc[:, df1.loc['a'] > 0]

Unnamed: 0,B,C
a,1.185075,0.093787
b,-0.317633,-0.047595
c,0.205571,-1.191746
d,1.436217,-2.073527
e,1.644355,-0.021278
f,-0.657392,-0.091122


In [None]:
from IPython.display import HTML

display(
    HTML(
        """



<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:700px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1%0Adf1.loc%5B%3A,%20df1.loc%5B'a'%5D%20%3E%200%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>


"""
    )
)


NA values in a boolean array propagate as `False`:

In [None]:
mask = pd.array([True, False, True, False, pd.NA, False], dtype="boolean")
mask

<BooleanArray>
[True, False, True, False, <NA>, False]
Length: 6, dtype: boolean

In [None]:
df1[mask]

Unnamed: 0,A,B,C,D
a,-0.829219,1.185075,0.093787,-0.44214
c,1.195465,0.205571,-1.191746,-0.836474


In [None]:
from IPython.display import HTML

display(
    HTML(
        """



<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:680px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Amask%20%3D%20pd.array%28%5BTrue,%20False,%20True,%20False,%20pd.NA,%20False%5D,%20dtype%3D%22boolean%22%29%0Adf1%5Bmask%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>


"""
    )
)


For getting a value explicitly:

In [None]:
df1.loc['a', 'A'] # this is also equivalent to ``df1.at['a','A']``

-0.8292186214151204

In [None]:
from IPython.display import HTML

display(
    HTML(
        """



<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:680px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1%0Adf1.loc%5B'a',%20'A'%5D%20&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>



"""
    )
)


## Slicing with labels

When using `.loc` with slices, if both the start and the stop labels are present in the index, then elements located between the two (including them) are returned:

In [None]:
s = pd.Series(list('abcde'), index=[0, 3, 2, 5, 4])
s.loc[3:5]

3    b
2    c
5    d
dtype: object

In [None]:
from IPython.display import HTML

display(
    HTML(
        """



<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:580px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0As%20%3D%20pd.Series%28list%28'abcde'%29,%20index%3D%5B0,%203,%202,%205,%204%5D%29%0As.loc%5B3%3A5%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>




"""
    )
)


If at least one of the two is absent, but the index is sorted, and can be compared against start and stop labels, then slicing will still work as expected, by selecting labels which rank between the two:

In [None]:
s.sort_index()

0    a
2    c
3    b
4    e
5    d
dtype: object

In [None]:
from IPython.display import HTML

display(
    HTML(
        """



<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:600px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0As%20%3D%20pd.Series%28list%28'abcde'%29,%20index%3D%5B0,%203,%202,%205,%204%5D%29%0As.sort_index%28%29&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>



"""
    )
)


In [None]:
s.sort_index().loc[1:6]

2    c
3    b
4    e
5    d
dtype: object

In [None]:
from IPython.display import HTML

display(
    HTML(
        """



<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:820px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0As%20%3D%20pd.Series%28list%28'abcde'%29,%20index%3D%5B0,%203,%202,%205,%204%5D%29%0As.sort_index%28%29.loc%5B1%3A6%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>



"""
    )
)


However, if at least one of the two is absent and the index is not sorted, an error will be raised (since doing otherwise would be computationally expensive, as well as potentially ambiguous for mixed-type indexes). For instance, in the above example, `s.loc[1:6]` would raise `KeyError`.

In [None]:
s = pd.Series(list('abcdef'), index=[0, 3, 2, 5, 4, 2])
s.loc[3:5]

3    b
2    c
5    d
dtype: object

In [None]:
from IPython.display import HTML

display(
    HTML(
        """



<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:650px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0As%20%3D%20pd.Series%28list%28'abcdef'%29,%20index%3D%5B0,%203,%202,%205,%204,%202%5D%29%0As.loc%5B3%3A5%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>


"""
    )
)



Also, if the index has duplicate labels and either the start or the stop label is duplicated, an error will be raised. For instance, in the above example, `s.loc[2:5]` would raise a `KeyError`.

## Selection by position

Whether a copy or a reference is returned for a setting operation, may depend on the context. This is sometimes called `chained assignment` and should be avoided.

Pandas provides a suite of methods in order to get purely integer-based indexing. The semantics follow closely Python and NumPy slicing. These are 0-based indexing. When slicing, the start bound is included, while the upper bound is excluded. Trying to use a non-integer, even a valid label will raise an `IndexError`.

The `.iloc` attribute is the primary access method. The following are valid inputs:

- An integer e.g. `5`.

- A list or array of integers `[4, 3, 0]`.

- A slice object with ints `1:7`.

- A boolean array.

- A `callable`.

In [None]:
s1 = pd.Series(np.random.randn(5), index=list(range(0, 10, 2)))
s1
s1.iloc[:3]

0   -0.124201
2    1.294954
4   -0.793453
dtype: float64

In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:600px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0As1%20%3D%20pd.Series%28np.random.randn%285%29,%20index%3Dlist%28range%280,%2010,%202%29%29%29%0As1.iloc%5B%3A3%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>

"""
    )
)


In [None]:
s1.iloc[3]

-0.9449480012698949

In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:600px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0As1%20%3D%20pd.Series%28np.random.randn%285%29,%20index%3Dlist%28range%280,%2010,%202%29%29%29%0As1.iloc%5B%3A3%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>

"""
    )
)


Note that setting works as well:

In [None]:
s1.iloc[:3] = 0
s1

0    0.000000
2    0.000000
4    0.000000
6   -0.944948
8    0.385288
dtype: float64

In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:620px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0As1%20%3D%20pd.Series%28np.random.randn%285%29,%20index%3Dlist%28range%280,%2010,%202%29%29%29%0As1.iloc%5B%3A3%5D%20%3D%200%0As1&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>

"""
    )
)


With a DataFrame,Select via integer slicing:

In [None]:
df1 = pd.DataFrame(np.random.randn(6, 4),
                   index=list(range(0, 12, 2)),
                   columns=list(range(0, 8, 2)))
df1
df1.iloc[:3]

Unnamed: 0,0,2,4,6
0,0.176708,-0.734049,-0.874521,0.013537
2,1.809582,0.802905,-0.563674,-0.466175
4,0.813012,-0.131666,1.373226,-0.56818


In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:680px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28range%280,%2012,%202%29%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28range%280,%208,%202%29%29%29%0Adf1%0Adf1.iloc%5B%3A3%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>
"""
    )
)


In [None]:
df1.iloc[1:5, 2:4]

Unnamed: 0,4,6
2,-0.563674,-0.466175
4,1.373226,-0.56818
6,-0.467455,1.028096
8,0.156377,-0.368254


Select via integer list:

In [None]:
df1.iloc[[1, 3, 5], [1, 3]]

Unnamed: 0,2,6
2,0.802905,-0.466175
6,-1.760254,1.028096
10,-1.020584,1.98755


In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:700px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28range%280,%2012,%202%29%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28range%280,%208,%202%29%29%29%0Adf1%0Adf1.iloc%5B1%3A5,%202%3A4%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>
"""
    )
)


In [None]:
df1.iloc[1:3, :]

Unnamed: 0,0,2,4,6
2,1.809582,0.802905,-0.563674,-0.466175
4,0.813012,-0.131666,1.373226,-0.56818


In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:680px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28range%280,%2012,%202%29%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28range%280,%208,%202%29%29%29%0Adf1%0Adf1.iloc%5B1%3A3,%20%3A%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>



"""
    )
)


In [None]:
df1.iloc[:, 1:3]

Unnamed: 0,2,4
0,-0.734049,-0.874521
2,0.802905,-0.563674
4,-0.131666,1.373226
6,-1.760254,-0.467455
8,-1.629683,0.156377
10,-1.020584,-0.194566


In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:700px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28range%280,%2012,%202%29%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28range%280,%208,%202%29%29%29%0Adf1%0Adf1.iloc%5B%3A,%201%3A3%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>


"""
    )
)


In [None]:
df1.iloc[1, 1] # this is also equivalent to ``df1.iat[1,1]``

0.8029050594558378

In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:680px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28range%280,%2012,%202%29%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28range%280,%208,%202%29%29%29%0Adf1%0Adf1.iloc%5B1,%201%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>


"""
    )
)



For getting a cross-section using an integer position (equiv to `df.xs(1)`):

In [None]:
df1.iloc[1]

0    1.809582
2    0.802905
4   -0.563674
6   -0.466175
Name: 2, dtype: float64

In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:700px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28range%280,%2012,%202%29%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28range%280,%208,%202%29%29%29%0Adf1%0Adf1.iloc%5B1%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>


"""
    )
)


Out-of-range slice indexes are handled gracefully just as in Python/NumPy.

In [None]:
x = list('abcdef') # these are allowed in Python/NumPy.
x

['a', 'b', 'c', 'd', 'e', 'f']

In [None]:
x[4:10]

['e', 'f']

In [None]:
x[8:10]

[]

In [None]:
s = pd.Series(x)
s

0    a
1    b
2    c
3    d
4    e
5    f
dtype: object

In [None]:
s.iloc[4:10]

4    e
5    f
dtype: object

In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:600px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Ax%20%3D%20list%28'abcdef'%29%20%0As%20%3D%20pd.Series%28x%29%0As.iloc%5B4%3A10%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>

"""
    )
)


In [None]:
s.iloc[8:10]

Series([], dtype: object)

Note that using slices that go out of bounds can result in an empty axis (e.g. an empty DataFrame being returned).

In [None]:
dfl = pd.DataFrame(np.random.randn(5, 2), columns=list('AB'))

In [None]:
dfl.iloc[:, 2:3]

0
1
2
3
4


In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:630px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%20%0Anp.random.seed%2810%29%0Adfl%20%3D%20pd.DataFrame%28np.random.randn%285,%202%29,%20columns%3Dlist%28'AB'%29%29%0Adfl%0Adfl.iloc%5B%3A,%202%3A3%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>
"""
    )
)


In [None]:
dfl.iloc[:, 1:3]

Unnamed: 0,B
0,-0.491934
1,-0.758957
2,1.793034
3,-0.330006
4,1.362746


In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:630px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%20%0Anp.random.seed%2810%29%0Adfl%20%3D%20pd.DataFrame%28np.random.randn%285,%202%29,%20columns%3Dlist%28'AB'%29%29%0Adfl.iloc%5B%3A,%201%3A3%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>

"""
    )
)


In [None]:
dfl.iloc[4:6]

Unnamed: 0,A,B
4,0.238833,1.362746


In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:600px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%20%0Anp.random.seed%2810%29%0Adfl%20%3D%20pd.DataFrame%28np.random.randn%285,%202%29,%20columns%3Dlist%28'AB'%29%29%0Adfl.iloc%5B4%3A6%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>

"""
    )
)



A single indexer that is out of bounds will raise an `IndexError`. A list of indexers where any element is out of bounds will raise an `IndexError`.

In [None]:
dfl.iloc[[4, 5, 6]]

In [None]:
dfl.iloc[:, 4]

## Selection by callable

`.loc`, `.iloc`, and also `[]` indexing can accept a `callable` as indexer. The `callable` must be a function with one argument (the calling Series or DataFrame) that returns valid output for indexing.

In [None]:
df1 = pd.DataFrame(np.random.randn(6, 4),
                   index=list('abcdef'),
                   columns=list('ABCD'))
df1
df1.loc[lambda df: df['A'] > 0, :]

Unnamed: 0,A,B,C,D
b,0.206097,0.325348,-0.811762,0.696057
c,1.369032,1.861469,0.35549,0.416873
d,0.028375,0.855487,0.998617,-1.899382


In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:700px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%20%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1%0Adf1.loc%5Blambda%20df%3A%20df%5B'A'%5D%20%3E%200,%20%3A%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>
"""
    )
)


In [None]:
df1.loc[:, lambda df: ['A', 'B']]

Unnamed: 0,A,B
a,-0.889154,-0.228248
b,0.206097,0.325348
c,1.369032,1.861469
d,0.028375,0.855487
e,-0.344703,1.783202
f,-0.660587,0.034734


In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:700px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1.loc%5B%3A,%20lambda%20df%3A%20%5B'A',%20'B'%5D%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>



"""
    )
)


In [None]:
df1.iloc[:, lambda df: [0, 1]]

Unnamed: 0,A,B
a,-0.889154,-0.228248
b,0.206097,0.325348
c,1.369032,1.861469
d,0.028375,0.855487
e,-0.344703,1.783202
f,-0.660587,0.034734


In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:700px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1.iloc%5B%3A,%20lambda%20df%3A%20%5B0,%201%5D%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>


"""
    )
)


In [None]:
df1[lambda df: df.columns[0]]

a   -0.889154
b    0.206097
c    1.369032
d    0.028375
e   -0.344703
f   -0.660587
Name: A, dtype: float64

In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:650px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1%5Blambda%20df%3A%20df.columns%5B0%5D%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>

"""
    )
)



You can use callable indexing in `Series`.

In [None]:
df1['A'].loc[lambda s: s > 0]

b    0.206097
c    1.369032
d    0.028375
Name: A, dtype: float64

In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:920px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1%5B'A'%5D.loc%5Blambda%20s%3A%20s%20%3E%200%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>

"""
    )
)



### Combining positional and label-based indexing

If you wish to get the 0th and the 2nd elements from the index in the `'A'` column, you can do:

In [None]:
dfd = pd.DataFrame({'A': [1, 2, 3],
                    'B': [4, 5, 6]},
                   index=list('abc'))
dfd
dfd.loc[dfd.index[[0, 2]], 'A']

a    1
c    3
Name: A, dtype: int64

In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:550px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Adfd%20%3D%20pd.DataFrame%28%7B'A'%3A%20%5B1,%202,%203%5D,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20'B'%3A%20%5B4,%205,%206%5D%7D,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abc'%29%29%0Adfd.loc%5Bdfd.index%5B%5B0,%202%5D%5D,%20'A'%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>

"""
    )
)



This can also be expressed using `.iloc`, by explicitly getting locations on the indexers, and using positional indexing to select things.

In [None]:
dfd.iloc[[0, 2], dfd.columns.get_loc('A')]

a    1
c    3
Name: A, dtype: int64

In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:550px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Adfd%20%3D%20pd.DataFrame%28%7B'A'%3A%20%5B1,%202,%203%5D,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20'B'%3A%20%5B4,%205,%206%5D%7D,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abc'%29%29%0Adfd.iloc%5B%5B0,%202%5D,%20dfd.columns.get_loc%28'A'%29%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>



"""
    )
)



For getting multiple indexers, using `.get_indexer`:

In [None]:
dfd.iloc[[0, 2], dfd.columns.get_indexer(['A', 'B'])]

Unnamed: 0,A,B
a,1,4
c,3,6


In [None]:
from IPython.display import HTML

display(
    HTML(
        """


<div class='full-width docutils' >
  <div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
    <p class="admonition-title pandastutor">Let's visualize it! ðŸŽ¥</p>
    <div class="pandastutor inner" style="height:550px;">
      <iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Adfd%20%3D%20pd.DataFrame%28%7B'A'%3A%20%5B1,%202,%203%5D,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20'B'%3A%20%5B4,%205,%206%5D%7D,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abc'%29%29%0Adfd.iloc%5B%5B0,%202%5D,%20dfd.columns.get_indexer%28%5B'A',%20'B'%5D%29%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
    </div>
  </div>
</div>


"""
    )
)


## Acknowledgments

Thanks for [Pandas user guide](https://pandas.pydata.org/docs/user_guide/index.html). It contributes the majority of the content in this chapter.