aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/caps.c62
-rw-r--r--fs/ceph/export.c3
-rw-r--r--fs/ceph/inode.c31
-rw-r--r--fs/ceph/mds_client.c11
-rw-r--r--fs/ceph/mds_client.h6
-rw-r--r--fs/ceph/mdsmap.c2
-rw-r--r--fs/ceph/snap.c3
7 files changed, 70 insertions, 48 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 53cfe026b3ea..e54814d0c2f7 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -754,6 +754,7 @@ void ceph_add_cap(struct inode *inode,
cap->issue_seq = seq;
cap->mseq = mseq;
cap->cap_gen = gen;
+ wake_up_all(&ci->i_cap_wq);
}
/*
@@ -2247,7 +2248,6 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode)
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_request *req1 = NULL, *req2 = NULL;
- unsigned int max_sessions;
int ret, err = 0;
spin_lock(&ci->i_unsafe_lock);
@@ -2266,27 +2266,23 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode)
spin_unlock(&ci->i_unsafe_lock);
/*
- * The mdsc->max_sessions is unlikely to be changed
- * mostly, here we will retry it by reallocating the
- * sessions array memory to get rid of the mdsc->mutex
- * lock.
- */
-retry:
- max_sessions = mdsc->max_sessions;
-
- /*
* Trigger to flush the journal logs in all the relevant MDSes
* manually, or in the worst case we must wait at most 5 seconds
* to wait the journal logs to be flushed by the MDSes periodically.
*/
- if ((req1 || req2) && likely(max_sessions)) {
- struct ceph_mds_session **sessions = NULL;
- struct ceph_mds_session *s;
+ if (req1 || req2) {
struct ceph_mds_request *req;
+ struct ceph_mds_session **sessions;
+ struct ceph_mds_session *s;
+ unsigned int max_sessions;
int i;
- sessions = kzalloc(max_sessions * sizeof(s), GFP_KERNEL);
+ mutex_lock(&mdsc->mutex);
+ max_sessions = mdsc->max_sessions;
+
+ sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL);
if (!sessions) {
+ mutex_unlock(&mdsc->mutex);
err = -ENOMEM;
goto out;
}
@@ -2298,16 +2294,6 @@ retry:
s = req->r_session;
if (!s)
continue;
- if (unlikely(s->s_mds >= max_sessions)) {
- spin_unlock(&ci->i_unsafe_lock);
- for (i = 0; i < max_sessions; i++) {
- s = sessions[i];
- if (s)
- ceph_put_mds_session(s);
- }
- kfree(sessions);
- goto retry;
- }
if (!sessions[s->s_mds]) {
s = ceph_get_mds_session(s);
sessions[s->s_mds] = s;
@@ -2320,16 +2306,6 @@ retry:
s = req->r_session;
if (!s)
continue;
- if (unlikely(s->s_mds >= max_sessions)) {
- spin_unlock(&ci->i_unsafe_lock);
- for (i = 0; i < max_sessions; i++) {
- s = sessions[i];
- if (s)
- ceph_put_mds_session(s);
- }
- kfree(sessions);
- goto retry;
- }
if (!sessions[s->s_mds]) {
s = ceph_get_mds_session(s);
sessions[s->s_mds] = s;
@@ -2341,11 +2317,12 @@ retry:
/* the auth MDS */
spin_lock(&ci->i_ceph_lock);
if (ci->i_auth_cap) {
- s = ci->i_auth_cap->session;
- if (!sessions[s->s_mds])
- sessions[s->s_mds] = ceph_get_mds_session(s);
+ s = ci->i_auth_cap->session;
+ if (!sessions[s->s_mds])
+ sessions[s->s_mds] = ceph_get_mds_session(s);
}
spin_unlock(&ci->i_ceph_lock);
+ mutex_unlock(&mdsc->mutex);
/* send flush mdlog request to MDSes */
for (i = 0; i < max_sessions; i++) {
@@ -2759,13 +2736,17 @@ again:
* on transition from wanted -> needed caps. This is needed
* for WRBUFFER|WR -> WR to avoid a new WR sync write from
* going before a prior buffered writeback happens.
+ *
+ * For RDCACHE|RD -> RD, there is not need to wait and we can
+ * just exclude the revoking caps and force to sync read.
*/
int not = want & ~(have & need);
int revoking = implemented & ~have;
+ int exclude = revoking & not;
dout("get_cap_refs %p have %s but not %s (revoking %s)\n",
inode, ceph_cap_string(have), ceph_cap_string(not),
ceph_cap_string(revoking));
- if ((revoking & not) == 0) {
+ if (!exclude || !(exclude & CEPH_CAP_FILE_BUFFER)) {
if (!snap_rwsem_locked &&
!ci->i_head_snapc &&
(need & CEPH_CAP_FILE_WR)) {
@@ -2787,7 +2768,7 @@ again:
snap_rwsem_locked = true;
}
if ((have & want) == want)
- *got = need | want;
+ *got = need | (want & ~exclude);
else
*got = need;
ceph_take_cap_refs(ci, *got, true);
@@ -3550,6 +3531,9 @@ static void handle_cap_grant(struct inode *inode,
check_caps = 1; /* check auth cap only */
else
check_caps = 2; /* check all caps */
+ /* If there is new caps, try to wake up the waiters */
+ if (~cap->issued & newcaps)
+ wake = true;
cap->issued = newcaps;
cap->implemented |= newcaps;
} else if (cap->issued == newcaps) {
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index e0fa66ac8b9f..f780e4e0d062 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -181,6 +181,7 @@ struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino)
static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
{
struct inode *inode = __lookup_inode(sb, ino);
+ struct ceph_inode_info *ci = ceph_inode(inode);
int err;
if (IS_ERR(inode))
@@ -192,7 +193,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
return ERR_PTR(err);
}
/* -ESTALE if inode as been unlinked and no file is open */
- if ((inode->i_nlink == 0) && (atomic_read(&inode->i_count) == 1)) {
+ if ((inode->i_nlink == 0) && !__ceph_is_file_opened(ci)) {
iput(inode);
return ERR_PTR(-ESTALE);
}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 42351d7a0dd6..bad9eeb6a1a5 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -362,7 +362,7 @@ static int ceph_fill_fragtree(struct inode *inode,
if (nsplits != ci->i_fragtree_nsplits) {
update = true;
} else if (nsplits) {
- i = prandom_u32() % nsplits;
+ i = prandom_u32_max(nsplits);
id = le32_to_cpu(fragtree->splits[i].frag);
if (!__ceph_find_frag(ci, id))
update = true;
@@ -2192,6 +2192,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied,
&prealloc_cf);
inode->i_ctime = attr->ia_ctime;
+ inode_inc_iversion_raw(inode);
}
release &= issued;
@@ -2356,6 +2357,7 @@ int ceph_do_getvxattr(struct inode *inode, const char *name, void *value,
goto out;
}
+ req->r_feature_needed = CEPHFS_FEATURE_OP_GETVXATTR;
req->r_path2 = kstrdup(name, GFP_NOFS);
if (!req->r_path2) {
err = -ENOMEM;
@@ -2447,6 +2449,7 @@ int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct inode *inode = d_inode(path->dentry);
+ struct super_block *sb = inode->i_sb;
struct ceph_inode_info *ci = ceph_inode(inode);
u32 valid_mask = STATX_BASIC_STATS;
int err = 0;
@@ -2476,16 +2479,34 @@ int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path,
}
if (ceph_snap(inode) == CEPH_NOSNAP)
- stat->dev = inode->i_sb->s_dev;
+ stat->dev = sb->s_dev;
else
stat->dev = ci->i_snapid_map ? ci->i_snapid_map->dev : 0;
if (S_ISDIR(inode->i_mode)) {
- if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb),
- RBYTES))
+ if (ceph_test_mount_opt(ceph_sb_to_client(sb), RBYTES)) {
stat->size = ci->i_rbytes;
- else
+ } else if (ceph_snap(inode) == CEPH_SNAPDIR) {
+ struct ceph_inode_info *pci;
+ struct ceph_snap_realm *realm;
+ struct inode *parent;
+
+ parent = ceph_lookup_inode(sb, ceph_ino(inode));
+ if (IS_ERR(parent))
+ return PTR_ERR(parent);
+
+ pci = ceph_inode(parent);
+ spin_lock(&pci->i_ceph_lock);
+ realm = pci->i_snap_realm;
+ if (realm)
+ stat->size = realm->num_snaps;
+ else
+ stat->size = 0;
+ spin_unlock(&pci->i_ceph_lock);
+ iput(parent);
+ } else {
stat->size = ci->i_files + ci->i_subdirs;
+ }
stat->blocks = 0;
stat->blksize = 65536;
/*
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 80f8b9ec1a31..26a0a8b9975e 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2318,6 +2318,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
INIT_LIST_HEAD(&req->r_unsafe_dir_item);
INIT_LIST_HEAD(&req->r_unsafe_target_item);
req->r_fmode = -1;
+ req->r_feature_needed = -1;
kref_init(&req->r_kref);
RB_CLEAR_NODE(&req->r_node);
INIT_LIST_HEAD(&req->r_wait);
@@ -2916,6 +2917,16 @@ static void __do_request(struct ceph_mds_client *mdsc,
dout("do_request mds%d session %p state %s\n", mds, session,
ceph_session_state_name(session->s_state));
+
+ /*
+ * The old ceph will crash the MDSs when see unknown OPs
+ */
+ if (req->r_feature_needed > 0 &&
+ !test_bit(req->r_feature_needed, &session->s_features)) {
+ err = -EOPNOTSUPP;
+ goto out_session;
+ }
+
if (session->s_state != CEPH_MDS_SESSION_OPEN &&
session->s_state != CEPH_MDS_SESSION_HUNG) {
/*
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 256e3eada6c1..0598faa50e2e 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -31,8 +31,9 @@ enum ceph_feature_type {
CEPHFS_FEATURE_METRIC_COLLECT,
CEPHFS_FEATURE_ALTERNATE_NAME,
CEPHFS_FEATURE_NOTIFY_SESSION_STATE,
+ CEPHFS_FEATURE_OP_GETVXATTR,
- CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_NOTIFY_SESSION_STATE,
+ CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_OP_GETVXATTR,
};
#define CEPHFS_FEATURES_CLIENT_SUPPORTED { \
@@ -44,6 +45,7 @@ enum ceph_feature_type {
CEPHFS_FEATURE_DELEG_INO, \
CEPHFS_FEATURE_METRIC_COLLECT, \
CEPHFS_FEATURE_NOTIFY_SESSION_STATE, \
+ CEPHFS_FEATURE_OP_GETVXATTR, \
}
/*
@@ -336,6 +338,8 @@ struct ceph_mds_request {
long long r_dir_ordered_cnt;
int r_readdir_cache_idx;
+ int r_feature_needed;
+
struct ceph_cap_reservation r_caps_reservation;
};
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 8d0a6d2c2da4..3fbabc98e1f7 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -29,7 +29,7 @@ static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy)
return -1;
/* pick */
- n = prandom_u32() % n;
+ n = prandom_u32_max(n);
for (j = 0, i = 0; i < m->possible_max_rank; i++) {
if (CEPH_MDS_IS_READY(i, ignore_laggy))
j++;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 864cdaa0d2bd..e4151852184e 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -763,7 +763,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
struct ceph_mds_snap_realm *ri; /* encoded */
__le64 *snaps; /* encoded */
__le64 *prior_parent_snaps; /* encoded */
- struct ceph_snap_realm *realm = NULL;
+ struct ceph_snap_realm *realm;
struct ceph_snap_realm *first_realm = NULL;
struct ceph_snap_realm *realm_to_rebuild = NULL;
int rebuild_snapcs;
@@ -774,6 +774,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
dout("%s deletion=%d\n", __func__, deletion);
more:
+ realm = NULL;
rebuild_snapcs = 0;
ceph_decode_need(&p, e, sizeof(*ri), bad);
ri = p;