Untitled diff

Created Diff never expires
113 removals
Lines
Total
Removed
Words
Total
Removed
To continue using this feature, upgrade to
Diffchecker logo
Diffchecker Pro
348 lines
68 additions
Lines
Total
Added
Words
Total
Added
To continue using this feature, upgrade to
Diffchecker logo
Diffchecker Pro
310 lines
#!/bin/bash
#!/bin/bash
# Copyright 2016 Google, Inc.
# Copyright 2016 Google, Inc.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.


# This init script installs a cloud-sql-proxy on each node in the cluster, and
# This init script installs a cloud-sql-proxy on each node in the cluster, and
# uses that proxy to expose TCP proxies of one or more CloudSQL instances.
# uses that proxy to expose TCP proxies of one or more CloudSQL instances.
# One of these instances is used for the clusters Hive Metastore.
# One of these instances is used for the clusters Hive Metastore.


# Do not use "set -x" to avoid printing passwords in clear in the logs
# Do not use "set -x" to avoid printing passwords in clear in the logs
set -euo pipefail
set -euo pipefail


# Whether to configure the Hive metastore to point to a Cloud SQL database.
# Whether to configure the Hive metastore to point to a Cloud SQL database.
# This is not required for Hive & Spark I/O.
# This is not required for Hive & Spark I/O.

HIVE_USER_DEFAULT='${sql_user}'
HIVE_PASSWORD_DEFAULT='${sql_password}'
SQL_USER_DEFAULT='root'
SQL_PASSWORD_DEFAULT='${sql_password}'

readonly enable_cloud_sql_metastore="$(/usr/share/google/get_metadata_value attributes/enable-cloud-sql-hive-metastore || echo 'true')"
readonly enable_cloud_sql_metastore="$(/usr/share/google/get_metadata_value attributes/enable-cloud-sql-hive-metastore || echo 'true')"


# Whether to enable the proxy on workers. This is not necessary for the
# Whether to enable the proxy on workers. This is not necessary for the
# Metastore, but is required for Hive & Spark I/O.
# Metastore, but is required for Hive & Spark I/O.
readonly enable_proxy_on_workers="$(/usr/share/google/get_metadata_value attributes/enable-cloud-sql-proxy-on-workers || echo 'true')"
readonly enable_proxy_on_workers="$(/usr/share/google/get_metadata_value attributes/enable-cloud-sql-proxy-on-workers || echo 'true')"


# Database user to use to access metastore.
# Database user to use to access metastore.
readonly db_hive_user="$(/usr/share/google/get_metadata_value attributes/db-hive-user || echo 'hive')"
readonly db_hive_user="$(/usr/share/google/get_metadata_value attributes/db-hive-user || echo $HIVE_USER_DEFAULT)"


readonly db_admin_user="$(/usr/share/google/get_metadata_value attributes/db-admin-user || echo 'root')"
readonly db_admin_user="$(/usr/share/google/get_metadata_value attributes/db-admin-user || echo $SQL_USER_DEFAULT)"


readonly kms_key_uri="$(/usr/share/google/get_metadata_value attributes/kms-key-uri)"
readonly kms_key_uri="$(/usr/share/google/get_metadata_value attributes/kms-key-uri)"


# Database admin user password used to create the metastore database and user.
# Database admin user password used to create the metastore database and user.
readonly db_admin_password_uri="$(/usr/share/google/get_metadata_value attributes/db-admin-password-uri)"
readonly db_admin_password_uri="$(/usr/share/google/get_metadata_value attributes/db-admin-password-uri)"
if [[ -n "${db_admin_password_uri}" ]]; then
if [[ -n "$${db_admin_password_uri}" ]]; then
# Decrypt password
# Decrypt password
readonly db_admin_password="$(gsutil cat $db_admin_password_uri | \
readonly db_admin_password="$(gsutil cat $db_admin_password_uri | \
gcloud kms decrypt \
gcloud kms decrypt \
--ciphertext-file - \
--ciphertext-file - \
--plaintext-file - \
--plaintext-file - \
--key $kms_key_uri)"
--key $kms_key_uri)"
else
else
readonly db_admin_password=''
readonly db_admin_password=$SQL_PASSWORD_DEFAULT
fi
fi
if [ "${db_admin_password}" == "" ]; then
if [ "$${db_admin_password}" == "" ]; then
readonly db_admin_password_parameter=""
readonly db_admin_password_parameter=""
else
else
readonly db_admin_password_parameter="-p${db_admin_password}"
readonly db_admin_password_parameter="-p$${db_admin_password}"
fi
fi


# Database password to use to access metastore.
# Database password to use to access metastore.
readonly db_hive_password_uri="$(/usr/share/google/get_metadata_value attributes/db-hive-password-uri)"
readonly db_hive_password_uri="$(/usr/share/google/get_metadata_value attributes/db-hive-password-uri)"
if [[ -n "${db_hive_password_uri}" ]]; then
if [[ -n "$${db_hive_password_uri}" ]]; then
# Decrypt password
# Decrypt password
readonly db_hive_password="$(gsutil cat $db_hive_password_uri | \
readonly db_hive_password="$(gsutil cat $db_hive_password_uri | \
gcloud kms decrypt \
gcloud kms decrypt \
--ciphertext-file - \
--ciphertext-file - \
--plaintext-file - \
--plaintext-file - \
--key $kms_key_uri)"
--key $kms_key_uri)"
else
else
readonly db_hive_password='hive-password'
readonly db_hive_password=$HIVE_PASSWORD_DEFAULT
fi
fi
if [ "${db_hive_password}" == "" ]; then
if [ "$${db_hive_password}" == "" ]; then
readonly db_hive_password_parameter=""
readonly db_hive_password_parameter=""
else
else
readonly db_hive_password_parameter="-p${db_hive_password}"
readonly db_hive_password_parameter="-p$${db_hive_password}"
fi
fi


readonly PROXY_DIR='/var/run/cloud_sql_proxy'
readonly PROXY_DIR='/var/run/cloud_sql_proxy'
readonly PROXY_BIN='/usr/local/bin/cloud_sql_proxy'
readonly PROXY_BIN='/usr/local/bin/cloud_sql_proxy'
readonly INIT_SCRIPT='/usr/lib/systemd/system/cloud-sql-proxy.service'
readonly INIT_SCRIPT='/usr/lib/systemd/system/cloud-sql-proxy.service'
readonly ADDITIONAL_INSTANCES_KEY='attributes/additional-cloud-sql-instances'
readonly ADDITIONAL_INSTANCES_KEY='attributes/additional-cloud-sql-instances'


# Dataproc master nodes information
readonly DATAPROC_MASTER=$(/usr/share/google/get_metadata_value attributes/dataproc-master)

function err() {
function err() {
echo "[$(date +'%Y-%m-%dT%H:%M:%S%z')]: $@" >&2
echo "[$(date +'%Y-%m-%dT%H:%M:%S%z')]: $@" >&2
return 1
return 1
}
}


# Helper to run any command with Fibonacci backoff.
# Helper to run any command with Fibonacci backoff.
# If all retries fail, returns last attempt's exit code.
# If all retries fail, returns last attempt's exit code.
# Args: "$@" is the command to run.
# Args: "$@" is the command to run.
function run_with_retries() {
function run_with_retries() {
local retry_backoff=(1 1 2 3 5 8 13 21 34 55 89 144)
local retry_backoff=(1 1 2 3 5 8 13 21 34 55 89 144)
local -a cmd=("$@")
local -a cmd=("$@")
echo "About to run '${cmd[*]}' with retries..."
echo "About to run '$${cmd[*]}' with retries..."


local update_succeeded=0
local update_succeeded=0
for ((i = 0; i < ${#retry_backoff[@]}; i++)); do
for ((i = 0; i < $${#retry_backoff[@]}; i++)); do
if "${cmd[@]}"; then
if "$${cmd[@]}"; then
update_succeeded=1
update_succeeded=1
break
break
else
else
local sleep_time=${retry_backoff[$i]}
local sleep_time=$${retry_backoff[$i]}
echo "'${cmd[*]}' attempt $(( $i + 1 )) failed! Sleeping ${sleep_time}." >&2
echo "'$${cmd[*]}' attempt $(( $i + 1 )) failed! Sleeping $${sleep_time}." >&2
sleep ${sleep_time}
sleep $${sleep_time}
fi
fi
done
done


if ! (( ${update_succeeded} )); then
if ! (( $${update_succeeded} )); then
echo "Final attempt of '${cmd[*]}'..."
echo "Final attempt of '$${cmd[*]}'..."
# Let any final error propagate all the way out to any error traps.
# Let any final error propagate all the way out to any error traps.
"${cmd[@]}"
"$${cmd[@]}"
fi
fi
}
}


function configure_proxy_flags() {
function configure_proxy_flags() {
if [[ $enable_cloud_sql_metastore = "true" ]]; then
if [[ $enable_cloud_sql_metastore = "true" ]]; then
if [[ -z "${metastore_instance}" ]]; then
if [[ -z "$${metastore_instance}" ]]; then
err 'Must specify hive-metastore-instance VM metadata'
err 'Must specify hive-metastore-instance VM metadata'
elif ! [[ "${metastore_instance}" =~ .+:.+:.+ ]]; then
elif ! [[ "$${metastore_instance}" =~ .+:.+:.+ ]]; then
err 'hive-metastore-instance must be of form project:region:instance'
err 'hive-metastore-instance must be of form project:region:instance'
elif ! [[ "${metastore_instance}" =~ =tcp:[0-9]+$ ]]; then
elif ! [[ "$${metastore_instance}" =~ =tcp:[0-9]+$ ]]; then
metastore_instance+="=tcp:${metastore_proxy_port}"
metastore_instance+="=tcp:$${metastore_proxy_port}"
else
else
metastore_proxy_port="${metastore_instance##*:}"
metastore_proxy_port="$${metastore_instance##*:}"
fi
fi
proxy_instances_flags+=" -instances=${metastore_instance}"
proxy_instances_flags+=" -instances=$${metastore_instance}"
fi
fi


if [[ -n "${additional_instances}" ]]; then
if [[ -n "$${additional_instances}" ]]; then
# Pass additional instances straight to the proxy.
# Pass additional instances straight to the proxy.
proxy_instances_flags+=" -instances_metadata=instance/${ADDITIONAL_INSTANCES_KEY}"
proxy_instances_flags+=" -instances_metadata=instance/$${ADDITIONAL_INSTANCES_KEY}"
fi
fi
}
}


function install_cloud_sql_proxy() {
function install_cloud_sql_proxy() {
# Install proxy.
# Install proxy.
wget -q https://dl.google.com/cloudsql/cloud_sql_proxy.linux.amd64 \
wget -q https://dl.google.com/cloudsql/cloud_sql_proxy.linux.amd64 \
|| err 'Unable to download cloud-sql-proxy binary'
|| err 'Unable to download cloud-sql-proxy binary'
mv cloud_sql_proxy.linux.amd64 ${PROXY_BIN}
mv cloud_sql_proxy.linux.amd64 $${PROXY_BIN}
chmod +x ${PROXY_BIN}
chmod +x $${PROXY_BIN}


mkdir -p ${PROXY_DIR}
mkdir -p $${PROXY_DIR}


# Install proxy as systemd service for reboot tolerance.
# Install proxy as systemd service for reboot tolerance.
cat << EOF > ${INIT_SCRIPT}
cat << EOF > $${INIT_SCRIPT}
[Unit]
[Unit]
Description=Google Cloud SQL Proxy
Description=Google Cloud SQL Proxy
After=local-fs.target network-online.target
After=local-fs.target network-online.target
After=google.service
After=google.service
Before=shutdown.target
Before=shutdown.target


[Service]
[Service]
Type=simple
Type=simple
ExecStart=${PROXY_BIN} \
ExecStart=$${PROXY_BIN} \
-dir=${PROXY_DIR} \
-dir=$${PROXY_DIR} \
${proxy_instances_flags}
$${proxy_instances_flags}


[Install]
[Install]
WantedBy=multi-user.target
WantedBy=multi-user.target
EOF
EOF
chmod a+rw ${INIT_SCRIPT}
chmod a+rw $${INIT_SCRIPT}
systemctl enable cloud-sql-proxy
systemctl enable cloud-sql-proxy
systemctl start cloud-sql-proxy \
systemctl start cloud-sql-proxy \
|| err 'Unable to start cloud-sql-proxy service'
|| err 'Unable to start cloud-sql-proxy service'

if [[ $enable_cloud_sql_metastore = "true" ]]; then
run_with_retries nc -zv localhost $${metastore_proxy_port}
run_with_retries nc -zv localhost ${metastore_proxy_port}
fi


echo 'Cloud SQL Proxy installation succeeded' >&2
echo 'Cloud SQL Proxy installation succeeded' >&2


if [[ $enable_cloud_sql_metastore = "true" ]]; then
if [[ $enable_cloud_sql_metastore = "true" ]]; then
# Update hive-site.xml
# Update hive-site.xml
cat << EOF > hive-template.xml
cat << EOF > hive-template.xml
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<configuration>
<property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://localhost:${metastore_proxy_port}/${metastore_db}</value>
<value>jdbc:mysql://localhost:$${metastore_proxy_port}/$${metastore_db}</value>
<description>the URL of the MySQL database</description>
<description>the URL of the MySQL database</description>
</property>
</property>
<property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<name>javax.jdo.option.ConnectionUserName</name>
<value>${db_hive_user}</value>
<value>$${db_hive_user}</value>
</property>
</property>
<property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<name>javax.jdo.option.ConnectionPassword</name>
<value>${db_hive_password}</value>
<value>$${db_hive_password}</value>
</property>
</property>
</configuration>
</configuration>
EOF
EOF


bdconfig merge_configurations \
bdconfig merge_configurations \
--configuration_file /etc/hive/conf/hive-site.xml \
--configuration_file /etc/hive/conf/hive-site.xml \
--source_configuration_file hive-template.xml \
--source_configuration_file hive-template.xml \
--clobber
--clobber
fi
fi
}
}




function configure_sql_client(){
function configure_sql_client(){
# Configure mysql client to talk to metastore
# Configure mysql client to talk to metastore
cat << EOF > /etc/mysql/conf.d/cloud-sql-proxy.cnf
cat << EOF > /etc/mysql/conf.d/cloud-sql-proxy.cnf
[client]
[client]
protocol = tcp
protocol = tcp
port = ${metastore_proxy_port}
port = $${metastore_proxy_port}
EOF
EOF


# Check if metastore is initialized.
# Check if metastore is initialized.
if ! mysql -u "${db_hive_user}" "${db_hive_password_parameter}" -e ''; then
if ! mysql -u "$${db_hive_user}" "$${db_hive_password_parameter}" -e ''; then
mysql -u "${db_admin_user}" "${db_admin_password_parameter}" -e \
mysql -u "$${db_admin_user}" "$${db_admin_password_parameter}" -e \
"CREATE USER '${db_hive_user}' IDENTIFIED BY '${db_hive_password}';"
"CREATE USER '$${db_hive_user}' IDENTIFIED BY '$${db_hive_password}';"
fi
fi
if mysql -u "${db_hive_user}" "${db_hive_password_parameter}" -e "use ${metastore_db}"; then
if mysql -u "$${db_hive_user}" "$${db_hive_password_parameter}" -e "use $${metastore_db}"; then
# Extract the warehouse URI.
# Extract the warehouse URI.
HIVE_WAREHOURSE_URI=$(mysql -u "${db_hive_user}" "${db_hive_password_parameter}" -Nse \
HIVE_WAREHOURSE_URI=$(mysql -u "$${db_hive_user}" "$${db_hive_password_parameter}" -Nse \
"SELECT DB_LOCATION_URI FROM ${metastore_db}.DBS WHERE NAME = 'default';")
"SELECT DB_LOCATION_URI FROM $${metastore_db}.DBS WHERE NAME = 'default';")
bdconfig set_property \
bdconfig set_property \
--name 'hive.metastore.warehouse.dir' \
--name 'hive.metastore.warehouse.dir' \
--value "${HIVE_WAREHOURSE_URI}" \
--value "$${HIVE_WAREHOURSE_URI}" \
--configuration_file /etc/hive/conf/hive-site.xml \
--configuration_file /etc/hive/conf/hive-site.xml \
--clobber
--clobber
else
else
# Initialize database with current warehouse URI.
# Initialize database with current warehouse URI.
mysql -u "${db_admin_user}" "${db_admin_password_parameter}" -e \
mysql -u "$${db_admin_user}" "$${db_admin_password_parameter}" -e \
"CREATE DATABASE ${metastore_db}; \
"CREATE DATABASE $${metastore_db}; \
GRANT ALL PRIVILEGES ON ${metastore_db}.* TO '${db_hive_user}';"
GRANT ALL PRIVILEGES ON $${metastore_db}.* TO '$${db_hive_user}';"
/usr/lib/hive/bin/schematool -dbType mysql -initSchema \
/usr/lib/hive/bin/schematool -dbType mysql -initSchema \
|| err 'Failed to set mysql schema.'
|| err 'Failed to set mysql schema.'
fi
fi


run_with_retries run_validation
}

function run_validation() {
if ( systemctl is-enabled --quiet hive-metastore ); then
if ( systemctl is-enabled --quiet hive-metastore ); then
# Start metastore back up.
# Start metastore back up.
systemctl restart hive-metastore \
systemctl start hive-metastore \
|| err 'Unable to start hive-metastore service'
|| err 'Unable to start hive-metastore service'
else
else
echo "Service hive-metastore is not loaded"
echo "Service hive-metastore is not loaded"
fi
fi


# Check that metastore schema is compatible.
/usr/lib/hive/bin/schematool -dbType mysql -info || \
err 'Run /usr/lib/hive/bin/schematool -dbType mysql -upgradeSchemaFrom <schema-version> to upgrade the schema. Note that this may break Hive metastores that depend on the old schema'

# Validate it's functioning.
# Validate it's functioning.
if ! timeout 60s beeline -u jdbc:hive2://localhost:10000 -e 'SHOW TABLES;' >& /dev/null; then
if ! hive -e 'SHOW TABLES;' >& /dev/null; then
err 'Failed to bring up Cloud SQL Metastore'
err 'Failed to bring up Cloud SQL Metastore'
else
echo 'Cloud SQL Hive Metastore initialization succeeded' >&2
fi
fi
echo 'Cloud SQL Hive Metastore initialization succeeded' >&2


}
}



function configure_hive_warehouse_dir(){
# Wait for master 0 to create the metastore db if necessary.
run_with_retries run_validation

HIVE_WAREHOURSE_URI=$(beeline -u jdbc:hive2://localhost:10000 \
-e "describe database default;" \
| sed '4q;d' | cut -d "|" -f4 | tr -d '[:space:]')

echo "Hive warehouse uri: $HIVE_WAREHOURSE_URI"

bdconfig set_property \
--name 'hive.metastore.warehouse.dir' \
--value "${HIVE_WAREHOURSE_URI}" \
--configuration_file /etc/hive/conf/hive-site.xml \
--clobber
echo "Updated hive warehouse dir"
}

function main() {
function main() {


local role
local role
role="$(/usr/share/google/get_metadata_value attributes/dataproc-role)"
role="$(/usr/share/google/get_metadata_value attributes/dataproc-role)"


local metastore_instance
local metastore_instance
metastore_instance="$(/usr/share/google/get_metadata_value attributes/hive-metastore-instance || true)"
metastore_instance="$(/usr/share/google/get_metadata_value attributes/hive-metastore-instance || true)"


local additional_instances
local additional_instances
additional_instances="$(/usr/share/google/get_metadata_value ${ADDITIONAL_INSTANCES_KEY} || true)"
additional_instances="$(/usr/share/google/get_metadata_value $${ADDITIONAL_INSTANCES_KEY} || true)"


local metastore_db
local metastore_db
metastore_db="$(/usr/share/google/get_metadata_value attributes/hive-metastore-db || true)"
metastore_db="$(/usr/share/google/get_metadata_value attributes/hive-metastore-db || true)"


# Name of CloudSQL instance to use for the metastore. Must already exist.
# Name of CloudSQL instance to use for the metastore. Must already exist.
# Uncomment to hard code an instance. Metadata will still take precedence.
# Uncomment to hard code an instance. Metadata will still take precedence.
metastore_instance_default= # my-project:my-region:my-instance
metastore_instance_default= # my-project:my-region:my-instance
metastore_instance="${metastore_instance:-${metastore_instance_default}}"
metastore_instance="$${metastore_instance:-$${metastore_instance_default}}"


# Name of MySQL database to use for the metastore. Will be created if
# Name of MySQL database to use for the metastore. Will be created if
# it doesn't exist.
# it doesn't exist.


metastore_db="${metastore_db:-hive_metastore}"
metastore_db="$${metastore_db:-hive_metastore}"


local metastore_proxy_port
local metastore_proxy_port
metastore_proxy_port="$(/usr/share/google/get_metadata_value attributes/metastore-proxy-port || echo '3306')"
metastore_proxy_port="$(/usr/share/google/get_metadata_value attributes/metastore-proxy-port || echo '3306')"


# Validation
# Validation
if [[ $enable_cloud_sql_metastore != "true" ]] && [[ -z "${additional_instances}" ]]; then
if [[ $enable_cloud_sql_metastore != "true" ]] && [[ -z "$${additional_instances}" ]]; then
err 'No Cloud SQL instances to proxy'
err 'No Cloud SQL instances to proxy'
fi
fi


local proxy_instances_flags
local proxy_instances_flags
proxy_instances_flags=''
proxy_instances_flags=''
configure_proxy_flags
configure_proxy_flags


if [[ "${role}" == 'Master' ]]; then
if [[ "$${role}" == 'Master' ]]; then
# Disable Hive Metastore and MySql Server.
# Disable Hive Metastore and MySql Server.
if [[ $enable_cloud_sql_metastore = "true" ]]; then
if [[ $enable_cloud_sql_metastore = "true" ]]; then
if ( systemctl is-enabled --quiet hive-metastore ); then
if ( systemctl is-enabled --quiet hive-metastore ); then
# Stop hive-metastore if it is enabled
# Stop hive-metastore if it is enabled
systemctl stop hive-metastore
systemctl stop hive-metastore
else
else
echo "Service hive-metastore is not enabled"
echo "Service hive-metastore is not enabled"
fi
fi
if ( systemctl is-enabled --quiet mysql ); then
systemctl stop mysql
systemctl stop mysql
systemctl disable mysql
systemctl disable mysql
else
echo "Service mysql is not enabled"
fi
fi
fi
install_cloud_sql_proxy
install_cloud_sql_proxy
if [[ $enable_cloud_sql_metastore = "true" ]]; then
if [[ $enable_cloud_sql_metastore = "true" ]]; then
if [[ "${HOSTNAME}" == "${DATAPROC_MASTER}" ]]; then
configure_sql_client
# Initialize metastore db instance and set hive.metastore.warehouse.dir
# on master 0.
configure_sql_client
else
# Set hive.metastore.warehouse.dir only on other masters.
configure_hive_warehouse_dir
fi
fi
fi
else
else
# This part runs on workers.
# This part runs on workers.
# Run installation on workers when enable_proxy_on_workers is set.
# Run installation on workers when enable_proxy_on_workers is set.
if [[ $enable_proxy_on_workers = "true" ]]; then
if [[ $enable_proxy_on_workers = "true" ]]; then
install_cloud_sql_proxy
install_cloud_sql_proxy
fi
fi
fi
fi


}
}


main
main