#WEEK 1: initial exploration of the APOGEE catalogue
#import some usual packages
import matplotlib.pyplot as plt
from astropy.table import Table
import pandas as pd
import numpy as np
import os
from jupyterthemes import jtplot
jtplot.reset()
def wrap_at(X, angle=180.):
return np.mod(X+angle,360.) - (360. - angle)
#changing of folder to acced to the data easier
os.chdir('/home/fgran/Dropbox/aa_Nice/Catalogues/')
#open the apogee dr17 file with astropy and converted to pandas data structure (easier to work with)
apogee = Table.read('allStarLite-dr17-synspec_rev1.fits')
names_to_keep = [name for name in apogee.colnames if len(apogee[name].shape) <= 1]
apogee = apogee[names_to_keep].to_pandas()
WARNING: hdu= was not specified but multiple tables are present, reading in first available table (hdu=1) [astropy.io.fits.connect]
#remove nans from the table
apogee.dropna(subset=['TEFF', 'LOGG', 'FE_H', 'ALPHA_M'], inplace=True)
#data within the apogee file
apogee.head()
APOGEE_ID | TELESCOPE | FIELD | ALT_ID | RA | DEC | GLON | GLAT | J | J_ERR | ... | NI_FE_ERR | NI_FE_FLAG | CU_FE | CU_FE_SPEC | CU_FE_ERR | CU_FE_FLAG | CE_FE | CE_FE_SPEC | CE_FE_ERR | CE_FE_FLAG | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | b'VESTA' | b'apo1m' | b'calibration' | b' ' | NaN | NaN | 292.219131 | -30.602919 | 99.999001 | 0.000 | ... | 0.010266 | 0 | NaN | NaN | 0.059403 | 2 | NaN | NaN | NaN | 64 |
1 | b'2M00000002+7417074' | b'apo25m' | b'120+12' | b'none' | 0.000103 | 74.285408 | 119.401807 | 11.767414 | 8.597000 | 0.039 | ... | 0.010609 | 0 | NaN | NaN | 0.001221 | 2 | NaN | NaN | NaN | 64 |
2 | b'2M00000019-1924498' | b'apo25m' | b'060-75' | b'none' | 0.000832 | -19.413851 | 63.394122 | -75.906397 | 11.074000 | 0.022 | ... | 0.013835 | 0 | NaN | NaN | 0.102594 | 2 | NaN | NaN | NaN | 64 |
3 | b'2M00000032+5737103' | b'apo25m' | b'116-04' | b'none' | 0.001335 | 57.619530 | 116.065371 | -4.564768 | 10.905000 | 0.023 | ... | 0.016555 | 0 | NaN | NaN | 0.123839 | 2 | NaN | NaN | NaN | 64 |
4 | b'2M00000032+5737103' | b'apo25m' | b'N7789' | b'none' | 0.001335 | 57.619530 | 116.065371 | -4.564768 | 10.905000 | 0.023 | ... | 0.012638 | 0 | NaN | NaN | 0.107103 | 2 | NaN | NaN | NaN | 64 |
5 rows × 182 columns
#check what columns are contained in the table
list(apogee.columns)
['APOGEE_ID', 'TELESCOPE', 'FIELD', 'ALT_ID', 'RA', 'DEC', 'GLON', 'GLAT', 'J', 'J_ERR', 'H', 'H_ERR', 'K', 'K_ERR', 'AK_TARG', 'AK_TARG_METHOD', 'AK_WISE', 'SFD_EBV', 'APOGEE_TARGET1', 'APOGEE_TARGET2', 'APOGEE2_TARGET1', 'APOGEE2_TARGET2', 'APOGEE2_TARGET3', 'APOGEE2_TARGET4', 'TARGFLAGS', 'SURVEY', 'PROGRAMNAME', 'NVISITS', 'SNR', 'SNREV', 'STARFLAG', 'STARFLAGS', 'ANDFLAG', 'ANDFLAGS', 'VHELIO_AVG', 'VSCATTER', 'VERR', 'RV_TEFF', 'RV_LOGG', 'RV_FEH', 'RV_ALPHA', 'RV_CARB', 'RV_CHI2', 'RV_CCFWHM', 'RV_AUTOFWHM', 'RV_FLAG', 'N_COMPONENTS', 'MEANFIB', 'SIGFIB', 'MIN_H', 'MAX_H', 'MIN_JK', 'MAX_JK', 'GAIAEDR3_SOURCE_ID', 'GAIAEDR3_PARALLAX', 'GAIAEDR3_PARALLAX_ERROR', 'GAIAEDR3_PMRA', 'GAIAEDR3_PMRA_ERROR', 'GAIAEDR3_PMDEC', 'GAIAEDR3_PMDEC_ERROR', 'GAIAEDR3_PHOT_G_MEAN_MAG', 'GAIAEDR3_PHOT_BP_MEAN_MAG', 'GAIAEDR3_PHOT_RP_MEAN_MAG', 'GAIAEDR3_DR2_RADIAL_VELOCITY', 'GAIAEDR3_DR2_RADIAL_VELOCITY_ERROR', 'GAIAEDR3_R_MED_GEO', 'GAIAEDR3_R_LO_GEO', 'GAIAEDR3_R_HI_GEO', 'GAIAEDR3_R_MED_PHOTOGEO', 'GAIAEDR3_R_LO_PHOTOGEO', 'GAIAEDR3_R_HI_PHOTOGEO', 'ASPCAP_GRID', 'ASPCAP_CHI2', 'ASPCAPFLAG', 'ASPCAPFLAGS', 'FRAC_BADPIX', 'FRAC_LOWSNR', 'FRAC_SIGSKY', 'EXTRATARG', 'MEMBERFLAG', 'MEMBER', 'TEFF', 'TEFF_ERR', 'LOGG', 'LOGG_ERR', 'M_H', 'M_H_ERR', 'ALPHA_M', 'ALPHA_M_ERR', 'VMICRO', 'VMACRO', 'VSINI', 'TEFF_SPEC', 'LOGG_SPEC', 'C_FE', 'C_FE_SPEC', 'C_FE_ERR', 'C_FE_FLAG', 'CI_FE', 'CI_FE_SPEC', 'CI_FE_ERR', 'CI_FE_FLAG', 'N_FE', 'N_FE_SPEC', 'N_FE_ERR', 'N_FE_FLAG', 'O_FE', 'O_FE_SPEC', 'O_FE_ERR', 'O_FE_FLAG', 'NA_FE', 'NA_FE_SPEC', 'NA_FE_ERR', 'NA_FE_FLAG', 'MG_FE', 'MG_FE_SPEC', 'MG_FE_ERR', 'MG_FE_FLAG', 'AL_FE', 'AL_FE_SPEC', 'AL_FE_ERR', 'AL_FE_FLAG', 'SI_FE', 'SI_FE_SPEC', 'SI_FE_ERR', 'SI_FE_FLAG', 'P_FE', 'P_FE_SPEC', 'P_FE_ERR', 'P_FE_FLAG', 'S_FE', 'S_FE_SPEC', 'S_FE_ERR', 'S_FE_FLAG', 'K_FE', 'K_FE_SPEC', 'K_FE_ERR', 'K_FE_FLAG', 'CA_FE', 'CA_FE_SPEC', 'CA_FE_ERR', 'CA_FE_FLAG', 'TI_FE', 'TI_FE_SPEC', 'TI_FE_ERR', 'TI_FE_FLAG', 'TIII_FE', 'TIII_FE_SPEC', 'TIII_FE_ERR', 'TIII_FE_FLAG', 'V_FE', 'V_FE_SPEC', 'V_FE_ERR', 'V_FE_FLAG', 'CR_FE', 'CR_FE_SPEC', 'CR_FE_ERR', 'CR_FE_FLAG', 'MN_FE', 'MN_FE_SPEC', 'MN_FE_ERR', 'MN_FE_FLAG', 'FE_H', 'FE_H_SPEC', 'FE_H_ERR', 'FE_H_FLAG', 'CO_FE', 'CO_FE_SPEC', 'CO_FE_ERR', 'CO_FE_FLAG', 'NI_FE', 'NI_FE_SPEC', 'NI_FE_ERR', 'NI_FE_FLAG', 'CU_FE', 'CU_FE_SPEC', 'CU_FE_ERR', 'CU_FE_FLAG', 'CE_FE', 'CE_FE_SPEC', 'CE_FE_ERR', 'CE_FE_FLAG']
#plot a simple Kiel diagram (Teff - logg) colour coded by metallicity
plt.figure(figsize=(7,8))
plt.title('Number of stars in the catalogue: %d' %apogee.RA.size)
plt.scatter(apogee.TEFF, apogee.LOGG, s=1, c=apogee.FE_H, rasterized=True)
plt.gca().invert_xaxis()
plt.gca().invert_yaxis()
plt.colorbar(label=r'$\rm{[Fe/H]}$ (dex)')
plt.xlabel(r'$T_{eff}$ (K)', size=14)
plt.ylabel(r'$log\ g$ (dex)', size=14)
plt.show()
#plot the positions in Galactic coordinates
plt.figure(figsize=(16,8))
plt.scatter(wrap_at(apogee.GLON), apogee.GLAT, s=0.5, color='k', alpha=0.05, rasterized=True)
plt.gca().invert_xaxis()
plt.xlabel(r'$\ell$ (deg)', size=14)
plt.ylabel(r'$b$ (deg)', size=14)
plt.show()
#plot for densities in the [Fe/H] - [alpha/M] space
plt.figure(figsize=(7,8))
plt.hist2d(apogee.FE_H, apogee.ALPHA_M, bins=200)
plt.xlim(-2, 0.5)
plt.ylim(-0.4, 0.6)
plt.xlabel(r'$\rm{[Fe/H]}$ (dex)', size=14)
plt.ylabel(r'$\rm{[\alpha/M]}$ (dex)', size=14)
plt.show()
#WEEK 2: positions and proper motions
plt.figure(figsize=(7,8))
plt.scatter(apogee.GAIAEDR3_PMRA.values, apogee.GAIAEDR3_PMDEC.values, s=1, color='k', alpha=0.01)
plt.xlim(-50, 50)
plt.ylim(-50, 50)
plt.xlabel(r'$\mu_\alpha \cos{\delta}\ (mas\ yr^{-1})$', size=14)
plt.ylabel(r'$\mu_\delta\ (mas\ yr^{-1})$', size=14)
plt.show()
#Selecting a cluster and plot the Kiel and CMD
#True values for the globular cluster M3
# rv_m3 = -150 km/s
# pmra_m3 = -0.152 mas/yr
# pmdec_m3 = -2.670 mas/yr
f_cluster = apogee.FIELD.values == b'M3'
f_rv = apogee.VHELIO_AVG.values[f_cluster] < -100.
f_pm = np.sqrt((apogee.GAIAEDR3_PMRA.values[f_cluster] + 0.152 ) ** 2. + (apogee.GAIAEDR3_PMDEC.values[f_cluster] + 2.670) ** 2.) < 1.0 #1 mas/yr circle around the cluster
plt.figure(figsize=(12,7))
plt.subplot(1,2,1)
plt.title('Number of cluster stars (no filters): %d' %apogee.RA[f_cluster].size)
plt.scatter(apogee.TEFF.values[f_cluster], apogee.LOGG.values[f_cluster], s=5,
c=apogee.FE_H.values[f_cluster], rasterized=True)
plt.gca().invert_xaxis()
plt.gca().invert_yaxis()
plt.colorbar(label=r'$\rm{[Fe/H]}$ (dex)')
plt.xlabel(r'$T_{eff}$ (K)', size=14)
plt.ylabel(r'$log\ g$ (dex)', size=14)
plt.subplot(1,2,2)
plt.title('Number of cluster stars (no filters): %d' %apogee.RA[f_cluster].size)
plt.scatter(apogee.GAIAEDR3_PHOT_BP_MEAN_MAG.values[f_cluster] - apogee.GAIAEDR3_PHOT_RP_MEAN_MAG.values[f_cluster],
apogee.GAIAEDR3_PHOT_G_MEAN_MAG.values[f_cluster], s=5, c=apogee.VHELIO_AVG.values[f_cluster], rasterized=True)
plt.gca().invert_yaxis()
plt.colorbar(label=r'$\rm{RV}\ (km\ s^{-1})$')
plt.xlabel(r'$G_{BP} - G_{RP}$ (mag)', size=14)
plt.ylabel(r'$G$ (mag)', size=14)
plt.tight_layout()
plt.show()
plt.figure(figsize=(12,7))
plt.subplot(1,2,1)
plt.title('Number of cluster stars (no filter): %d' %apogee.RA[f_cluster].size)
plt.scatter(apogee.TEFF.values[f_cluster][f_rv & f_pm], apogee.LOGG.values[f_cluster][f_rv & f_pm], s=5,
c=apogee.FE_H.values[f_cluster][f_rv & f_pm], rasterized=True)
plt.gca().invert_xaxis()
plt.gca().invert_yaxis()
plt.colorbar(label=r'$\rm{[Fe/H]}$ (dex)')
plt.xlabel(r'$T_{eff}$ (K)', size=14)
plt.ylabel(r'$log\ g$ (dex)', size=14)
plt.subplot(1,2,2)
plt.title('Number of cluster stars filtered by RV and PM: %d' %apogee.RA[f_cluster][f_rv & f_pm].size)
plt.scatter(apogee.GAIAEDR3_PHOT_BP_MEAN_MAG.values[f_cluster][f_rv & f_pm] - apogee.GAIAEDR3_PHOT_RP_MEAN_MAG.values[f_cluster][f_rv & f_pm],
apogee.GAIAEDR3_PHOT_G_MEAN_MAG.values[f_cluster][f_rv & f_pm], s=5, c=apogee.VHELIO_AVG.values[f_cluster][f_rv & f_pm], rasterized=True)
plt.gca().invert_yaxis()
plt.colorbar(label=r'$\rm{RV}\ (km\ s^{-1})$')
plt.xlabel(r'$G_{BP} - G_{RP}$ (mag)', size=14)
plt.ylabel(r'$G$ (mag)', size=14)
plt.tight_layout()
plt.show()
#WEEK 3: coordinates 2.0 - Galactocentric Cartesian and Cylindrical
from astropy.coordinates import CylindricalRepresentation
from astropy.coordinates import Galactocentric
from astropy.coordinates import SkyCoord
import astropy.units as u
apogee_coords = SkyCoord(ra = apogee.RA.values * u.deg, dec = apogee.DEC.values * u.deg, pm_ra_cosdec = apogee.GAIAEDR3_PMRA.values * u.mas/u.yr,
pm_dec = apogee.GAIAEDR3_PMDEC.values * u.mas/u.yr, distance = apogee.GAIAEDR3_R_MED_PHOTOGEO.values/1000. * u.kpc,
radial_velocity = apogee.VHELIO_AVG.values * u.km/u.s)
apogee_cartesian = apogee_coords.transform_to(Galactocentric)
#check the components of this transformation
apogee_cartesian.position_angle
#As you can see, there are (x,y,z) and (v_x, v_y, v_z) components
<bound method SkyCoord.position_angle of <SkyCoord (Galactocentric: galcen_coord=<ICRS Coordinate: (ra, dec) in deg (266.4051, -28.936175)>, galcen_distance=8.122 kpc, galcen_v_sun=(12.9, 245.6, 7.78) km / s, z_sun=20.8 pc, roll=0.0 deg): (x, y, z) in kpc [( nan, nan, nan), (-9.69119473, 2.78775525, 0.69141064), (-8.02730084, 0.193413 , -0.84105353), ..., (-9.32049931, 2.48084869, -0.26526494), (-3.39932029, -6.10749585, -7.18109589), (-8.21094408, 0.32696742, -0.32480011)] (v_x, v_y, v_z) in km / s [( nan, nan, nan), ( 36.46476598, 198.93217371, 4.77874132), (-39.0901128 , 173.91075287, -33.67936839), ..., ( 56.52340578, 206.26199483, 30.38104362), ( 78.19388238, 93.52276859, -54.12611531), (-38.98291286, 208.9720472 , -6.35052755)]>>
plt.figure(figsize=(12,6))
plt.subplot(1,3,1)
plt.scatter(apogee_cartesian.x.value, apogee_cartesian.y.value, s=1, color='k', alpha=0.01)
plt.scatter(0, 0, color='r', s=10, marker='x', label='GC')
plt.scatter(-8, 0, color='C4', s=10, marker='x', label='Sun')
plt.xlabel(r'$X$ (kpc)', size=14)
plt.ylabel(r'$Y$ (kpc)', size=14)
plt.subplot(1,3,2)
plt.scatter(apogee_cartesian.x.value, apogee_cartesian.z.value, s=1, color='k', alpha=0.01)
plt.scatter(0, 0, color='r', s=10, marker='x', label='GC')
plt.scatter(-8, 0, color='C4', s=10, marker='x', label='Sun')
plt.xlabel(r'$X$ (kpc)', size=14)
plt.ylabel(r'$Z$ (kpc)', size=14)
plt.subplot(1,3,3)
plt.scatter(apogee_cartesian.y.value, apogee_cartesian.z.value, s=1, color='k', alpha=0.01)
plt.scatter(0, 0, color='r', s=10, marker='x', label='GC')
plt.scatter(0, 0, color='C4', s=10, marker='x', label='Sun')
plt.xlabel(r'$Y$ (kpc)', size=14)
plt.ylabel(r'$Z$ (kpc)', size=14)
plt.legend(loc=1)
plt.tight_layout()
plt.show()
#Now a closer zoom in into the XYZ position of the Sun
plt.figure(figsize=(12,6))
plt.subplot(1,3,1)
plt.scatter(apogee_cartesian.x.value, apogee_cartesian.y.value, s=1, color='k', alpha=0.01)
plt.scatter(0, 0, color='r', s=30, marker='x', label='GC')
plt.scatter(-8, 0, color='C4', s=30, marker='x', label='Sun')
plt.xlabel(r'$X$ (kpc)', size=14)
plt.ylabel(r'$Y$ (kpc)', size=14)
plt.xlim(-20,0)
plt.ylim(-10,10)
plt.subplot(1,3,2)
plt.scatter(apogee_cartesian.x.value, apogee_cartesian.z.value, s=1, color='k', alpha=0.01)
plt.scatter(0, 0, color='r', s=30, marker='x', label='GC')
plt.scatter(-8, 0, color='C4', s=30, marker='x', label='Sun')
plt.xlabel(r'$X$ (kpc)', size=14)
plt.ylabel(r'$Z$ (kpc)', size=14)
plt.xlim(-20,0)
plt.ylim(-5,5)
plt.subplot(1,3,3)
plt.scatter(apogee_cartesian.y.value, apogee_cartesian.z.value, s=1, color='k', alpha=0.01)
plt.scatter(0, 0, color='r', s=30, marker='x', label='GC')
plt.scatter(0, 0, color='C4', s=30, marker='x', label='Sun')
plt.xlabel(r'$Y$ (kpc)', size=14)
plt.ylabel(r'$Z$ (kpc)', size=14)
plt.xlim(-10,10)
plt.ylim(-5,5)
plt.legend(loc=1)
plt.tight_layout()
plt.show()
#Now transform the Galactocentric cartesian into cylindrical (rho, phi, z)
apogee_cylindrical = apogee_coords.transform_to(Galactocentric).cylindrical
#Check the position in the cylindrical coordinates
apogee_cylindrical
<CylindricalRepresentation (rho, phi, z) in (kpc, deg, kpc) [( nan, nan, nan), (10.08418736, 163.9516651 , 0.69141064), ( 8.02963059, 178.61975964, -0.84105353), ..., ( 9.64501517, 165.09508837, -0.26526494), ( 6.98976995, -119.09951086, -7.18109589), ( 8.21745158, 177.71963364, -0.32480011)] (has differentials w.r.t.: 's')>
#Check the velocities in the cylindrical coordinates
apogee_cylindrical.differentials
{'s': <CylindricalDifferential (d_rho, d_phi, d_z) in (kpc mas / (rad yr), mas / yr, kpc mas / (rad yr)) [( nan, nan, nan), ( 4.2086005 , -4.21012982, 1.00807322), ( 9.12732835, -4.54281433, -7.10464682), ..., ( -0.33072907, -4.67742525, 6.40886677), (-25.2603312 , 0.68934277, -11.4178784 ), ( 9.97093294, -5.32043777, -1.33964078)]>}
plt.figure(figsize=(6,6))
plt.scatter(apogee_cylindrical.rho, apogee_cylindrical.z, color='k', s=1, alpha=0.1)
plt.xlabel(r'$R_{\rm GC}$ (kpc)', size=14)
plt.ylabel(r'$Z$ (kpc)', size=14)
plt.show()
#Try with the metalicity gradient across the disk
plt.figure(figsize=(6,6))
plt.scatter(apogee_cylindrical.rho, apogee.FE_H.values, color='k', s=1, alpha=0.1)
plt.xlabel(r'$R_{\rm GC}$ (kpc)', size=14)
plt.ylabel(r'${\rm [Fe/H]}$ (dex)', size=14)
plt.show()
#It is a bit unclear what is happening, so let's plot the same but for a limited sample of
#stars of one Galactic component (selected from the[alpha/M]-[Fe/H] plot)
f_disk = (apogee.ALPHA_M.values < 0.15) & (-1. < apogee.FE_H.values) & (apogee.FE_H.values < 0.5) & (apogee.GAIAEDR3_R_MED_PHOTOGEO > 2000.0)
from scipy import stats
bin_medians, bin_edges, binnumber = stats.binned_statistic(apogee_cylindrical.rho[f_disk], apogee.FE_H.values[f_disk],
statistic='median', bins=15, range=(0, 15))
bin_std, bin_edges, binnumber = stats.binned_statistic(apogee_cylindrical.rho[f_disk], apogee.FE_H.values[f_disk],
statistic='std', bins=15, range=(0, 15))
bin_counts, bin_edges, binnumber = stats.binned_statistic(apogee_cylindrical.rho[f_disk], apogee.FE_H.values[f_disk],
statistic='count', bins=15, range=(0, 15))
bin_width = (bin_edges[1] - bin_edges[0])
bin_centers = bin_edges[1:] - bin_width/2
error_median = 1.253 * bin_std / np.sqrt(bin_counts)
plt.figure(figsize=(10,6))
plt.scatter(apogee_cylindrical.rho[f_disk], apogee.FE_H.values[f_disk], color='grey', s=1, alpha=0.1)
plt.errorbar(bin_centers, bin_medians, yerr=error_median, xerr=bin_width/2., fmt='.k')
plt.xlabel(r'$R_{\rm GC}$ (kpc)', size=14)
plt.ylabel(r'${\rm [Fe/H]}$ (dex)', size=14)
plt.xlim(0,16)
plt.ylim(-1,0.5)
plt.show()