[PATCH] A test for "Can't get entries" error

classic Classic list List threaded Threaded
19 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH] A test for "Can't get entries" error

Dmitry Pavlenko-2
Hello Subversion community!
I've run into an error: when performing 2 specially constructed updates one after another
within the same session, SVN fails with

  $ ./ra-test 15
  svn_tests: E160016: Can't get entries of non-directory
  XFAIL: ra-test 15: check that there's no "Can't get entries" error

error. I believe these updates constructed that way are valid, so the problem is
somewhere in FSFS code. It's also interesting that if these updates are run
separately (e.g. by adding "if (FALSE)" to one or another), they succeed.

Originally I've discovered this when communicating with 'svnserve' but the problem
is also reproducible with pure FSFS with the latest trunk.

[[[
Add a reproducing test for "Can't get entries of non-directory" error.

* subversion/tests/libsvn_ra/ra-test.c
  (cant_get_entries_of_non_directory): Expect no error. Currently
   it fails with "Can't get entries of non-directory" error.
]]]
[[[
Index: subversion/tests/libsvn_ra/ra-test.c
===================================================================
--- subversion/tests/libsvn_ra/ra-test.c (revision 1846889)
+++ subversion/tests/libsvn_ra/ra-test.c (working copy)
@@ -1784,7 +1784,113 @@ commit_locked_file(const svn_test_opts_t *opts, ap
   return SVN_NO_ERROR;
 }
 
+static svn_error_t *
+cant_get_entries_of_non_directory(const svn_test_opts_t *opts, apr_pool_t *pool)
+{
+  svn_ra_session_t *session;
+  const svn_delta_editor_t *editor;
+  void *edit_baton;
+  const svn_ra_reporter3_t *reporter;
+  void *report_baton;
 
+  SVN_ERR(make_and_open_repos(&session,
+                              "cant_get_entries_of_non_directory", opts,
+                              pool));
+  
+  {
+    SVN_ERR(svn_ra_get_commit_editor3(session, &editor, &edit_baton,
+                                      apr_hash_make(pool), NULL,
+                                      NULL, NULL, FALSE, pool));
+    void *root_baton;
+    void *dir_baton;
+    void *file_baton;
+
+    SVN_ERR(editor->open_root(edit_baton, 0, pool, &root_baton));
+    SVN_ERR(editor->add_directory("A", root_baton, NULL, SVN_INVALID_REVNUM,
+                                  pool, &dir_baton));
+    SVN_ERR(editor->add_file("A/mu", dir_baton, NULL, SVN_INVALID_REVNUM,
+                             pool, &file_baton));
+    SVN_ERR(editor->close_file(file_baton, NULL, pool));
+    SVN_ERR(editor->close_directory(dir_baton, pool));
+    SVN_ERR(editor->close_directory(root_baton, pool));
+    SVN_ERR(editor->close_edit(edit_baton, pool));
+  }
+  {
+    void *root_baton;
+    void *dir_baton;
+    const char* repos_root_url;
+    const char* A_url;
+    
+    SVN_ERR(svn_ra_get_repos_root2(session, &repos_root_url, pool));
+    A_url = svn_path_url_add_component2(repos_root_url, "A", pool);
+    
+
+    SVN_ERR(editor->open_root(edit_baton, 1, pool, &root_baton));
+    SVN_ERR(editor->add_directory("B", root_baton, A_url, 1,
+                                  pool, &dir_baton));
+    SVN_ERR(editor->close_directory(dir_baton, pool));
+    SVN_ERR(editor->close_directory(root_baton, pool));
+    SVN_ERR(editor->close_edit(edit_baton, pool));
+  }
+  {
+    void *root_baton;
+
+    SVN_ERR(editor->open_root(edit_baton, 2, pool, &root_baton));
+    SVN_ERR(editor->delete_entry("B/mu", 2, root_baton, pool));
+    SVN_ERR(editor->close_directory(root_baton, pool));
+    SVN_ERR(editor->close_edit(edit_baton, pool));
+  }
+  {
+    void *root_baton;
+    void *dir_baton;
+    void *subdir_baton;
+    void *file_baton;
+
+    SVN_ERR(editor->open_root(edit_baton, 3, pool, &root_baton));
+    SVN_ERR(editor->open_directory("B", root_baton, 3, pool, &dir_baton));
+    SVN_ERR(editor->add_directory("B/mu", root_baton, NULL, SVN_INVALID_REVNUM,
+                                  pool, &subdir_baton));
+    SVN_ERR(editor->add_file("B/mu/iota", subdir_baton, NULL, SVN_INVALID_REVNUM,
+                             pool, &file_baton));
+    SVN_ERR(editor->close_file(file_baton, NULL, pool));
+    SVN_ERR(editor->close_directory(subdir_baton, pool));
+    SVN_ERR(editor->close_directory(dir_baton, pool));
+    SVN_ERR(editor->close_directory(root_baton, pool));
+    SVN_ERR(editor->close_edit(edit_baton, pool));
+  }
+  /* The following updates fail when executed in this order
+     one after another within the same session.
+    
+     When commenting out one of the blocks the test passes
+     */
+  {
+    SVN_ERR(svn_ra_do_update3(session, &reporter, &report_baton,
+                            3, "", svn_depth_infinity, TRUE, FALSE,
+                            svn_delta_default_editor(pool), NULL,
+                            pool, pool));
+    SVN_ERR(reporter->set_path(report_baton, "", 3, svn_depth_infinity, FALSE,
+                               NULL, pool));
+    SVN_ERR(reporter->set_path(report_baton, "B", 2, svn_depth_infinity, FALSE,
+                               NULL, pool));
+    SVN_ERR(reporter->finish_report(report_baton, pool));
+    
+    
+  }
+  {
+    SVN_ERR(svn_ra_do_update3(session, &reporter, &report_baton,
+                            4, "", svn_depth_infinity, TRUE, FALSE,
+                            svn_delta_default_editor(pool), NULL,
+                            pool, pool));
+    SVN_ERR(reporter->set_path(report_baton, "", 4, svn_depth_infinity, FALSE,
+                               NULL, pool));
+    SVN_ERR(reporter->set_path(report_baton, "B", 3, svn_depth_infinity, FALSE,
+                               NULL, pool));
+    SVN_ERR(reporter->finish_report(report_baton, pool));
+  }
+  return SVN_NO_ERROR;
+}
+
+
 /* The test table.  */
 
 static int max_threads = 4;
@@ -1820,6 +1926,8 @@ static struct svn_test_descriptor_t test_funcs[] =
                        "check how last change applies to empty commit"),
     SVN_TEST_OPTS_PASS(commit_locked_file,
                        "check commit editor for a locked file"),
+    SVN_TEST_OPTS_XFAIL(cant_get_entries_of_non_directory,
+                       "check that there's no \"Can't get entries\" error"),
     SVN_TEST_NULL
   };
]]]
--
Dmitry Pavlenko,
TMate Software,
http://subgit.com/ - git-svn bridge

cant_get_entries_test.patch (5K) Download Attachment
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] A test for "Can't get entries" error

Daniel Shahaf-5
Dmitry Pavlenko wrote on Mon, Nov 19, 2018 at 17:05:16 +0300:
> Hello Subversion community!
> I've run into an error: when performing 2 specially constructed updates one after another
> within the same session, SVN fails with
>
>   $ ./ra-test 15
>   svn_tests: E160016: Can't get entries of non-directory
>   XFAIL: ra-test 15: check that there's no "Can't get entries" error
>

That error code is SVN_ERR_FS_NOT_DIRECTORY.

> error. I believe these updates constructed that way are valid, so the problem is
> somewhere in FSFS code. It's also interesting that if these updates are run
> separately (e.g. by adding "if (FALSE)" to one or another), they succeed.
>

Could you please clarify whether the bug reproduces under other backends (FSX and BDB)?

> +++ subversion/tests/libsvn_ra/ra-test.c (working copy)
> @@ -1784,7 +1784,113 @@ commit_locked_file(const svn_test_opts_t *opts, ap
> +cant_get_entries_of_non_directory(const svn_test_opts_t *opts, apr_pool_t *pool)
> +{
> +  svn_ra_session_t *session;
> +  const svn_delta_editor_t *editor;
> +  void *edit_baton;
> +  const svn_ra_reporter3_t *reporter;
> +  void *report_baton;
>  
> +  SVN_ERR(make_and_open_repos(&session,
> +                              "cant_get_entries_of_non_directory", opts,
> +                              pool));
> +  
> +  {
> +    SVN_ERR(svn_ra_get_commit_editor3(session, &editor, &edit_baton,
> +                                      apr_hash_make(pool), NULL,
> +                                      NULL, NULL, FALSE, pool));
> +
> +    SVN_ERR(editor->open_root(edit_baton, 0, pool, &root_baton));
> +  }

You make all commits using the same EDITOR.  Is that allowed?  Should
you make the 'editor' variable block-scoped and call svn_ra_get_commit_editor3()
anew in each block?

> +  {
> +    SVN_ERR(editor->open_root(edit_baton, 1, pool, &root_baton));

> +  }

> +  {
> +    SVN_ERR(svn_ra_do_update3(session, &reporter, &report_baton,
> +                            3, "", svn_depth_infinity, TRUE, FALSE,
> +                            svn_delta_default_editor(pool), NULL,
> +                            pool, pool));
> +    SVN_ERR(reporter->set_path(report_baton, "", 3, svn_depth_infinity, FALSE,
> +                               NULL, pool));
> +    SVN_ERR(reporter->set_path(report_baton, "B", 2, svn_depth_infinity, FALSE,
> +                               NULL, pool));
> +    SVN_ERR(reporter->finish_report(report_baton, pool));
> +    
> +    
> +  }
> +  {
> +    SVN_ERR(svn_ra_do_update3(session, &reporter, &report_baton,
> +                            4, "", svn_depth_infinity, TRUE, FALSE,
> +                            svn_delta_default_editor(pool), NULL,
> +                            pool, pool));
> +    SVN_ERR(reporter->set_path(report_baton, "", 4, svn_depth_infinity, FALSE,
> +                               NULL, pool));
> +    SVN_ERR(reporter->set_path(report_baton, "B", 3, svn_depth_infinity, FALSE,
> +                               NULL, pool));
> +    SVN_ERR(reporter->finish_report(report_baton, pool));
> +  }

I agree that this should work, and therefore that it's a bug.

Suggestions for further debugging:

- Further minimise the test.  Try to have fewer add_file calls, fewer
  set_path calls, etc..  (I realise you must have minimised it a lot
  already, from whatever the original instance was, but the more the
  better.)

- Try doing the two updates in the opposite order (exchange the order of
  the two blocks).

- See if the bug happens under FSX/BDB.  That would tell us where to
  look further (in libsvn_fs_fs, or in the reporter logic in
  libsvn_repos).

> +  return SVN_NO_ERROR;
> +}

Style nits:

- Per-block comments would be helpful.  They don't need to be detailed,
  but something like /* r1: Create 'A' and 'A/mu' */ would help skim the
  function quickly.

- The test name isn't very descriptive.  I think it would be better to
  name the test after what it expects to work: doing two updates in a
  single session after a file replacement by directory.

- One line exceeds 80 columns.

Cheers,

Daniel
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] A test for "Can't get entries" error

Branko Čibej
On 20.11.2018 08:29, Daniel Shahaf wrote:

> Dmitry Pavlenko wrote on Mon, Nov 19, 2018 at 17:05:16 +0300:
>> Hello Subversion community!
>> I've run into an error: when performing 2 specially constructed updates one after another
>> within the same session, SVN fails with
>>
>>   $ ./ra-test 15
>>   svn_tests: E160016: Can't get entries of non-directory
>>   XFAIL: ra-test 15: check that there's no "Can't get entries" error
>>
> That error code is SVN_ERR_FS_NOT_DIRECTORY.
>
>> error. I believe these updates constructed that way are valid, so the problem is
>> somewhere in FSFS code. It's also interesting that if these updates are run
>> separately (e.g. by adding "if (FALSE)" to one or another), they succeed.
>>
> Could you please clarify whether the bug reproduces under other backends (FSX and BDB)?
>
>> +++ subversion/tests/libsvn_ra/ra-test.c (working copy)
>> @@ -1784,7 +1784,113 @@ commit_locked_file(const svn_test_opts_t *opts, ap
>> +cant_get_entries_of_non_directory(const svn_test_opts_t *opts, apr_pool_t *pool)
>> +{
>> +  svn_ra_session_t *session;
>> +  const svn_delta_editor_t *editor;
>> +  void *edit_baton;
>> +  const svn_ra_reporter3_t *reporter;
>> +  void *report_baton;
>>  
>> +  SVN_ERR(make_and_open_repos(&session,
>> +                              "cant_get_entries_of_non_directory", opts,
>> +                              pool));
>> +  
>> +  {
>> +    SVN_ERR(svn_ra_get_commit_editor3(session, &editor, &edit_baton,
>> +                                      apr_hash_make(pool), NULL,
>> +                                      NULL, NULL, FALSE, pool));
>> +
>> +    SVN_ERR(editor->open_root(edit_baton, 0, pool, &root_baton));
>> +  }
> You make all commits using the same EDITOR.  Is that allowed?  Should
> you make the 'editor' variable block-scoped and call svn_ra_get_commit_editor3()
> anew in each block?

As far as I know, using the editor after close_edit() or abort_edit()
have been called is a bad idea. This isn't explicitly spelled out in our
API docs, but none of our code or tests tries to reuse the editor. IIUC,
the only editor function you can call after close_edit() is abort_edit()
— which is a no-op if close_edit() succeeds.

The Ev2 documentation is a bit more explicit about that.

-- Brane

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] A test for "Can't get entries" error

Julian Foad-5
In reply to this post by Daniel Shahaf-5
> Dmitry Pavlenko wrote:
>> Hello Subversion community!

Hello Dmitry! Thanks for finding this. I confirm it.

>> I've run into an error: when performing 2 specially constructed updates one after another
>> within the same session, SVN fails with
>>
>>   $ ./ra-test 15
>>   svn_tests: E160016: Can't get entries of non-directory
>>   XFAIL: ra-test 15: check that there's no "Can't get entries" error
>
>> error. I believe these updates constructed that way are valid, so the problem is
>> somewhere in FSFS code.

I agree.

Daniel Shahaf wrote:
> Could you please clarify whether the bug reproduces under other backends
> (FSX and BDB)?

I found that the test passes under FSX and BDB; it only fails under FSFS.

I found that the RA method (local, svn, serf) makes no difference.

> You make all commits using the same EDITOR.  Is that allowed?

I found that it doesn't affect the outcome, in this case.

(Generally, as Brane said, it's undocumented and not a good idea. The attached version of the patch, 'cant_get_entries_test-j1.patch', uses a separate editor for each edit.)


--
- Julian

cant_get_entries_test-j1.patch (7K) Download Attachment
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] A test for "Can't get entries" error

Daniel Shahaf-2
Julian Foad wrote on Tue, 20 Nov 2018 08:38 +0000:
> Daniel Shahaf wrote:
> > Could you please clarify whether the bug reproduces under other backends
> > (FSX and BDB)?
>
> I found that the test passes under FSX and BDB; it only fails under FSFS.

The test XPASSes with SVN_X_DOES_NOT_MARK_THE_SPOT=1 (see notes/knobs),
so it's something to do with the caches.
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] A test for "Can't get entries" error

Branko Čibej
In reply to this post by Julian Foad-5
On 20.11.2018 09:38, Julian Foad wrote:

>> Dmitry Pavlenko wrote:
>>> Hello Subversion community!
> Hello Dmitry! Thanks for finding this. I confirm it.
>
>>> I've run into an error: when performing 2 specially constructed updates one after another
>>> within the same session, SVN fails with
>>>
>>>   $ ./ra-test 15
>>>   svn_tests: E160016: Can't get entries of non-directory
>>>   XFAIL: ra-test 15: check that there's no "Can't get entries" error
>>> error. I believe these updates constructed that way are valid, so the problem is
>>> somewhere in FSFS code.
> I agree.
>
> Daniel Shahaf wrote:
>> Could you please clarify whether the bug reproduces under other backends
>> (FSX and BDB)?
> I found that the test passes under FSX and BDB; it only fails under FSFS.
>
> I found that the RA method (local, svn, serf) makes no difference.
>
>> You make all commits using the same EDITOR.  Is that allowed?
> I found that it doesn't affect the outcome, in this case.
>
> (Generally, as Brane said, it's undocumented and not a good idea. The attached version of the patch, 'cant_get_entries_test-j1.patch', uses a separate editor for each edit.)


So ... definitely a FSFS bug then. Good catch. Julian, I'd say just
commit this test? But the XFAIL needs a predicate if it passes with FSX
and BDB.

-- Brane

Reply | Threaded
Open this post in threaded view
|

[PATCH] Re: [PATCH] A test for "Can't get entries" error

Daniel Shahaf-2
In reply to this post by Daniel Shahaf-2
Daniel Shahaf wrote on Tue, Nov 20, 2018 at 09:28:19 +0000:
> Julian Foad wrote on Tue, 20 Nov 2018 08:38 +0000:
> > Daniel Shahaf wrote:
> > > Could you please clarify whether the bug reproduces under other backends
> > > (FSX and BDB)?
> >
> > I found that the test passes under FSX and BDB; it only fails under FSFS.
>
> The test XPASSes with SVN_X_DOES_NOT_MARK_THE_SPOT=1 (see notes/knobs),
> so it's something to do with the caches.

So, looking at subversion/libsvn_fs_fs/tree.c:

  1076          /* If we found a directory entry, follow it.  First, we
  1077             check our node cache, and, failing that, we hit the DAG
  1078             layer.  Don't bother to contact the cache for the last
  1079             element if we already know the lookup to fail for the
  1080             complete path. */
  1081          if (next || !(flags & open_path_uncached))
  1082            SVN_ERR(dag_node_cache_get(&cached_node, root, path_so_far->data,
  1083                                       pool));
  1084          if (cached_node)
  1085            child = cached_node;
  1086          else
  1087            SVN_ERR(svn_fs_fs__dag_open(&child, here, entry, pool, iterpool));
  1088
  1089          /* "file not found" requires special handling.  */
  1090          if (child == NULL)
  1091            {
  ⋮
  1103              else if (flags & open_path_allow_null)
  1104                {
  1105                  parent_path = NULL;
  1106                  break;
  1107                }
  ⋮
  1114            }

This is what happens:

[[[
(lldb) breakpoint set -f tree.c -l 1087 -c 'root->rev == 2 && !(int)strcmp(path, "/B/mu/iota")'
(lldb) r
Process 17504 stopped
* thread #1, name = 'ra-test', stop reason = breakpoint 1.1
    frame #0: 0x00007ffff5b8be7a libsvn_fs_fs-1.so.0`open_path(parent_path_p=0x00007fffffffcb18, root=0x00007ffff7f6c9d8, path="/B/mu/iota", flags=12, is_txn_path=0, pool=0x00007ffff7f6c028) at tree.c:1087
   1084           if (cached_node)
   1085             child = cached_node;
   1086           else
-> 1087             SVN_ERR(svn_fs_fs__dag_open(&child, here, entry, pool, iterpool));
   1088
   1089           /* "file not found" requires special handling.  */
   1090           if (child == NULL)
(lldb) p root->is_txn_root
(svn_boolean_t) $2 = 0
(lldb) p root->rev
(svn_revnum_t) $3 = 2
(lldb) p stringify_node(here, pool)
(const char *) $0 = 0x00007ffff7f6cc08 "2.0.r1/0"
(lldb) pla shell svnlook tree --show-ids --full-paths cant_get_entries_of_non_directory -r2 | fgrep 2.0.r1/0
A/mu <2.0.r1/0>
B/mu <2.0.r1/0>
(lldb) p entry
(char *) $1 = 0x00007ffff7f6cbd8 "iota"
(lldb) n
Process 2969 stopped
* thread #1, name = 'ra-test', stop reason = step over
    frame #0: 0x00007ffff5b8c2b2 libsvn_fs_fs-1.so.0`open_path(parent_path_p=0x00007fffffffcb18, root=0x00007ffff7f6c9d8, path="/B/mu/iota", flags=12, is_txn_path=0, pool=0x00007ffff7f6c028) at tree.c:1176
   1173   svn_pool_destroy(iterpool);
   1174   *parent_path_p = parent_path;
   1175   return SVN_NO_ERROR;
-> 1176 }
   1177
   1178
   1179 /* Make the node referred to by PARENT_PATH mutable, if it isn't
(lldb)
]]]

In words: svn_fs_fs__dag_open() is asked to find the child "iota" of
r2:/B/mu, and instead of setting 'child' to NULL as the code expects, it
returns an error which percolates all the way to the client.

The reason SVN_X_DOES_NOT_MARK_THE_SPOT fixes it is that it bypasses the
optimization earlier in the function.  That optimization causes the the
very first iteration of the loop is to process "/B/mu".  With caches
disabled, the first iteration of the loop processes "/" and the second
iteration processes "/B" and exits early, here:

  1144      /* The path isn't finished yet; we'd better be in a directory.  */
  1145      if (svn_fs_fs__dag_node_kind(child) != svn_node_dir)
  1146        {
  1147          const char *msg;
  1148
  1149          /* Since this is not a directory and we are looking for some
  1150             sub-path, that sub-path will not exist.  That will be o.k.,
  1151             if we are just here to check for the path's existence. */
  1152          if (flags & open_path_allow_null)
  1153            {
  1154              parent_path = NULL;
  1155              break;
  1156            }

So, we just need to make the svn_fs_fs__dag_open() call set child=NULL
so it can fall back to the existing logic for handling FLAGS:

[[[
Index: subversion/libsvn_fs_fs/tree.c
===================================================================
--- subversion/libsvn_fs_fs/tree.c (revision 1845259)
+++ subversion/libsvn_fs_fs/tree.c (working copy)
@@ -1083,8 +1083,10 @@
                                        pool));
           if (cached_node)
             child = cached_node;
-          else
-            SVN_ERR(svn_fs_fs__dag_open(&child, here, entry, pool, iterpool));
+          else if (svn_fs_fs__dag_node_kind(here) == svn_node_dir)
+            SVN_ERR(svn_fs_fs__dag_open(&child, here, entry, pool, iterpool));
+          else
+            child = NULL;
 
           /* "file not found" requires special handling.  */
           if (child == NULL)
]]]

Makes sense?

Cheers,

Daniel
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Re: [PATCH] A test for "Can't get entries" error

Julian Foad-5
Daniel Shahaf wrote:
> Daniel Shahaf wrote on Tue, Nov 20, 2018 at 09:28:19 +0000:
> > The test XPASSes with SVN_X_DOES_NOT_MARK_THE_SPOT=1 (see notes/knobs),
> > so it's something to do with the caches.
>
> So, looking at subversion/libsvn_fs_fs/tree.c:
[...]

> In words: svn_fs_fs__dag_open() is asked to find the child "iota" of
> r2:/B/mu, and instead of setting 'child' to NULL as the code expects, it
> returns an error which percolates all the way to the client.
>
> The reason SVN_X_DOES_NOT_MARK_THE_SPOT fixes it is that it bypasses the
> optimization earlier in the function.  That optimization causes the the
> very first iteration of the loop is to process "/B/mu".  With caches
> disabled, the first iteration of the loop processes "/" and the second
> iteration processes "/B" and exits early, here:
>
>   1144      /* The path isn't finished yet; we'd better be in a directory.  */
>   1145      if (svn_fs_fs__dag_node_kind(child) != svn_node_dir)
>   1146        {
>   1147          const char *msg;
>   1148
>   1149          /* Since this is not a directory and we are looking for some
>   1150             sub-path, that sub-path will not exist.  That will be o.k.,
>   1151             if we are just here to check for the path's existence. */
>   1152          if (flags & open_path_allow_null)
>   1153            {
>   1154              parent_path = NULL;
>   1155              break;
>   1156            }
>
> So, we just need to make the svn_fs_fs__dag_open() call set child=NULL
> so it can fall back to the existing logic for handling FLAGS:
>
> [[[
> Index: subversion/libsvn_fs_fs/tree.c
> ===================================================================
> --- subversion/libsvn_fs_fs/tree.c (revision 1845259)
> +++ subversion/libsvn_fs_fs/tree.c (working copy)
> @@ -1083,8 +1083,10 @@
>                                         pool));
>            if (cached_node)
>              child = cached_node;
> -          else
> -            SVN_ERR(svn_fs_fs__dag_open(&child, here, entry, pool, iterpool));
> +          else if (svn_fs_fs__dag_node_kind(here) == svn_node_dir)
> +            SVN_ERR(svn_fs_fs__dag_open(&child, here, entry, pool, iterpool));
> +          else
> +            child = NULL;
>  
>            /* "file not found" requires special handling.  */
>            if (child == NULL)
> ]]]
>
> Makes sense?

The top-of-loop comment says:
"/* Whenever we are at the top of this loop:
     - HERE is our current directory, ..."

As HERE is apparently NOT a directory in this case, I wonder if the comment simply should say something like "current path (usually a directory)", or whether anything else is amiss too.

In reviewing the code I was unable to keep track of all the nuances of what happens (and should happen) in all the edge cases. Especially when 'flags & open_path_allow_null' is true and the requested path is a child of a non-directory like the "/B/mu/iota" in this case: that combination doesn't seem to be well documented, which makes me wonder what the callers expect it to do.

--
- Julian
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Re: [PATCH] A test for "Can't get entries" error

Julian Foad-5
FWIW: I also looked around in FSX to see if there was any comparable code, in case there was some correct version of this pattern or some other clue.

The comparable area of code is svn_fs_x__get_dag_path().

In its loop, it seems the 'here' variable is always supposed to point to a directory; the 'dag_step' call returns an error if not.

- Julian
Reply | Threaded
Open this post in threaded view
|

SVN-4791 FSFS Can't get entries of non-directory

Julian Foad-5
(Subject line changed.)

I have filed this issue as https://issues.apache.org/jira/browse/SVN-4791 "FSFS Can't get entries of non-directory".

I have run the test on older versions. On the 1.8.x branch the test passes; from 1.9.0 onwards it fails.

--
- Julian
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Re: [PATCH] A test for "Can't get entries" error

Daniel Shahaf-2
In reply to this post by Julian Foad-5
Julian Foad wrote on Wed, 21 Nov 2018 16:00 +0000:
> The top-of-loop comment says:
> "/* Whenever we are at the top of this loop:
>      - HERE is our current directory, ..."
>
> As HERE is apparently NOT a directory in this case, I wonder if the
> comment simply should say something like "current path (usually a
> directory)", or whether anything else is amiss too.
>

The comment is somewhat out of date, as it refers to a local variable ID
that doesn't exist.  I share your concern, however: I wondered if after
taking the 'shortcut' the loop was entered with some invariant not
holding and if patching the svn_fs_fs__dag_open() call was simply
covering up that underlying issue; however, in the end I think the
issue is real.

It _does_ look odd that open_path_allow_null is checked in two places,
though.  I suppose the second check could be removed and deferred to the
next iteration.  At that point we might also be able to find a way to
reproduce the original error even in a codepath that doesn't take the
'shortcut', in order to increase our confidence in the patch.

While we're at this function, does anyone understand why directory[1] is
accessed without checking whether directory[0] is not NUL?  There is
a comment there, but it doesn't enlighten me.  (However, I haven't run
'blame' on that comment yet.)  Even if it's correct, is there any reason
not to add an SVN_ERR_ASSERT(directory[0]) there?

> In reviewing the code I was unable to keep track of all the nuances of
> what happens (and should happen) in all the edge cases. Especially when
> 'flags & open_path_allow_null' is true and the requested path is a child
> of a non-directory like the "/B/mu/iota" in this case: that combination
> doesn't seem to be well documented, which makes me wonder what the
> callers expect it to do.

Have you read the docstring of the open_path_allow_null enumerator?

Cheers,

Daniel
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Re: [PATCH] A test for "Can't get entries" error

Daniel Shahaf-2
Daniel Shahaf wrote on Thu, 22 Nov 2018 00:51 +0000:
> While we're at this function, does anyone understand why directory[1] is
> accessed without checking whether directory[0] is not NUL?  There is
> a comment there, but it doesn't enlighten me.  (However, I haven't run
> 'blame' on that comment yet.)  Even if it's correct, is there any reason
> not to add an SVN_ERR_ASSERT(directory[0]) there?

Sorry, that's not quite the issue.  directory[0] is almost certainly '/', and
that's a fundamental enough aspect of canonical paths that we shouldn't
need to assert it everywhere; but I'm still not certain what
.
    /* root nodes are covered anyway */
.
means.

Cheers,

Daniel
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Re: [PATCH] A test for "Can't get entries" error

Julian Foad-5
In reply to this post by Daniel Shahaf-2
Daniel Shahaf wrote:
> Julian Foad wrote on Wed, 21 Nov 2018 16:00 +0000:
> > In reviewing the code I was unable to keep track of all the nuances of
> > what happens (and should happen) in all the edge cases. Especially when
> > 'flags & open_path_allow_null' is true and the requested path is a child
> > of a non-directory like the "/B/mu/iota" in this case: that combination
> > doesn't seem to be well documented, which makes me wonder what the
> > callers expect it to do.
>
> Have you read the docstring of the open_path_allow_null enumerator?

Maybe I was thinking of open_path_last_optional. "If FLAGS & open_path_last_optional is ... non-zero, require all the parent directories to exist as normal ..." when in this case the parent is not a directory.

--
- Julian
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Re: [PATCH] A test for "Can't get entries" error

Daniel Shahaf-2
Julian Foad wrote on Fri, 23 Nov 2018 13:56 +0000:

> Daniel Shahaf wrote:
> > Julian Foad wrote on Wed, 21 Nov 2018 16:00 +0000:
> > > In reviewing the code I was unable to keep track of all the nuances of
> > > what happens (and should happen) in all the edge cases. Especially when
> > > 'flags & open_path_allow_null' is true and the requested path is a child
> > > of a non-directory like the "/B/mu/iota" in this case: that combination
> > > doesn't seem to be well documented, which makes me wonder what the
> > > callers expect it to do.
> >
> > Have you read the docstring of the open_path_allow_null enumerator?
>
> Maybe I was thinking of open_path_last_optional. "If FLAGS &
> open_path_last_optional is ... non-zero, require all the parent
> directories to exist as normal ..." when in this case the parent is not
> a directory.

Ah, yes, I see.  The docstring has a lacuna.

By code inspection, all three callsites that pass
open_path_last_optional expect the following postcondition:
.
    (*parent_path_p)->node != NULL || svn_fs_fs__dag_node_kind((*parent_path_p)->parent->node) == svn_node_dir
.
which agrees with open_path() of r2:/B/mu/iota returning
SVN_ERR_FS_NOT_DIRECTORY.

That said, that postcondition doesn't seem to be a good API: the two
possible outcomes are very different from each other, to the point that
every single caller branches on them and handles them differently.  (The
function does not "do one thing well".)  If a caller of open_path(open_path_last_optional)
forgets to check whether parent->node is NULL or not, we'll probably
have a bug (hopefully nothing worse than a segfault or an error from the
DAG layer).  I don't see why we couldn't remove open_path_allow_null
entirely and have callsites that pass it just call open_path(..., dirname(path), ...)
and do the existence check on the child's basename explicitly.  The case
that 'path' exists is an error anyway.

All that said, the callers' code does appear to be correct, in a
quick skim.

Cheers,

Daniel
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] A test for "Can't get entries" error

Evgeny Kotkov
In reply to this post by Dmitry Pavlenko-2
Dmitry Pavlenko <[hidden email]> writes:

> I've run into an error: when performing 2 specially constructed updates one
> after another within the same session, SVN fails with
>
>   $ ./ra-test 15
>   svn_tests: E160016: Can't get entries of non-directory
>   XFAIL: ra-test 15: check that there's no "Can't get entries" error
>
> error. I believe these updates constructed that way are valid, so the problem
> is somewhere in FSFS code. It's also interesting that if these updates are
> run separately (e.g. by adding "if (FALSE)" to one or another), they succeed.

Apparently, this behavior is caused by a problem in the specific optimization
within the FSFS open_path() routine.

I constructed an FS regression test and committed it and the fix in:
   https://svn.apache.org/r1847572

(I'll try to nominate it for backport once I get some time.)


Thanks,
Evgeny Kotkov
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] A test for "Can't get entries" error

Julian Foad-5
Evgeny Kotkov wrote:
> Apparently, this behavior is caused by a problem in the specific optimization
> within the FSFS open_path() routine.
>
> I constructed an FS regression test and committed it and the fix in:
>    https://svn.apache.org/r1847572
>
> (I'll try to nominate it for backport once I get some time.)

Excellent! Thanks!

I had opened issue SVN-4791 for this. I have updated the log message and the issue to cross-reference each other.

https://issues.apache.org/jira/browse/SVN-4791

I expect we could improve the issue summary line which is currently "FSFS Can't get entries of non-directory".

I'm glad you included a FS-level test for it. In case anyone is interested, I attach a patch that backports the initial RA-level test to 1.8. 1.9 and 1.10.

I haven't reviewed or tested your r1847572 fix or test.

--
- Julian

svn-4791-initial-ra-level-test-backports-1.patch (25K) Download Attachment
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] A test for "Can't get entries" error

Daniel Shahaf-2
Good morning Julian,

Julian Foad wrote on Tue, 27 Nov 2018 20:12 +0000:
> I'm glad you included a FS-level test for it. In case anyone is
> interested, I attach a patch that backports the initial RA-level test to
> 1.8. 1.9 and 1.10.

Any reason not to commit that one, too?

Cheers,

Daniel
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] A test for "Can't get entries" error

Julian Foad-5
Julian Foad wrote:
> https://issues.apache.org/jira/browse/SVN-4791

This issue appears to be closely related to issue #SVN-4677. The fix applied in r1847572 is an extension of the fix applied in r1795116 for SVN-4677.

Reviewed, tested and nominated for backport to 1.11, 1.10, 1.9.

Daniel Shahaf wrote:
> Julian Foad wrote:
> > [...] I attach a patch that backports the initial RA-level test to 1.8. 1.9 and 1.10.
>
> Any reason not to commit that one, too?

No particular reason not to commit that, I suppose.

Really I'd prefer that we would put more effort into randomized testing. A single test could cover this particular sequence as well as many others.

In the absence of that ... Could do. No strong feeling. In one sense, extra regression tests are better ... but in another sense, more are clutter. I think in this case, as it seems the underlying cause is fixed and tested, I won't, but am fine with someone else doing so.

--
- Julian
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] A test for "Can't get entries" error

Evgeny Kotkov
In reply to this post by Daniel Shahaf-2
Daniel Shahaf <[hidden email]> writes:

> > I'm glad you included a FS-level test for it. In case anyone is
> > interested, I attach a patch that backports the initial RA-level test to
> > 1.8. 1.9 and 1.10.
>
> Any reason not to commit that one, too?

Personally, I think that having another test on the RA layer for this case
is unnecessary and would just increase the maintenance costs — since this
is an FS layer bug and given that we already have the (more or less) minimal
regression test in terms of the amount of the FS API calls.


Regards,
Evgeny Kotkov